架构之队列缓冲+批处理

原创于 2025-12-19 11:47:48 发布 · 544 阅读

13 ·

CC 4.0 BY-SA版权

文章标签：

#架构 #队列缓冲 #批处理

Architect 专栏收录该内容

31 篇文章

订阅专栏

架构之队列缓冲+批处理

引言

“在高并发场景下，队列是流量的缓冲器，批处理是性能的倍增器”

在互联网高并发时代，系统面临着海量请求的瞬间冲击。无论是电商秒杀、社交网络的热点事件，还是金融交易的高峰期，都需要系统具备强大的流量缓冲能力和高效的处理机制。队列缓冲+批处理架构通过将请求进行队列化缓冲，然后采用异步批处理的方式，不仅提升了系统的吞吐量，还保证了系统的稳定性和可扩展性。

队列缓冲+批处理法则强调：在高并发场景，可以用队列对数据进行缓冲，然后通过异步批处理的模式，提升性能。这一法则在Netty、消息队列、数据库写入等众多场景中得到了广泛应用和验证。

队列缓冲+批处理架构的核心理念

为什么需要队列缓冲+批处理？

队列缓冲+批处理架构能够有效解决上述挑战：

削峰填谷：通过队列缓冲瞬时流量，平滑处理压力
批量优化：减少单次处理开销，提升整体吞吐量
异步解耦：降低系统耦合度，提高可维护性
资源复用：充分利用系统资源，提升处理效率
可靠性增强：通过缓冲机制保证系统稳定性

队列缓冲+批处理的核心优势

Netty中的队列缓冲+批处理架构

Netty架构概览

Netty作为高性能网络通信框架，其核心架构充分体现了队列缓冲+批处理的设计思想。

Netty的MPSC队列实现

Netty使用MPSC（Multiple Producer Single Consumer）队列实现高效的任务缓冲和批处理。

// Netty MPSC队列实现
public class MpscLinkedQueue<T> extends BaseLinkedQueue<T> {
    
    // 使用原子引用实现无锁队列
    private final AtomicReference<LinkedQueueNode<T>> producerNode;
    private final AtomicReference<LinkedQueueNode<T>> consumerNode;
    
    public MpscLinkedQueue() {
        producerNode = new AtomicReference<>();
        consumerNode = new AtomicReference<>();
        
        // 初始化哨兵节点
        LinkedQueueNode<T> node = newNode();
        consumerNode.lazySet(node);
        producerNode.lazySet(node);
    }
    
    // 无锁入队操作（多生产者）
    @Override
    public boolean offer(T e) {
        if (null == e) {
            throw new NullPointerException();
        }
        
        final LinkedQueueNode<T> nextNode = newNode(e);
        final LinkedQueueNode<T> prevProducerNode = producerNode.getAndSet(nextNode);
        
        // 链接新节点 - 使用有序写保证可见性
        prevProducerNode.soNext(nextNode);
        
        return true;
    }
    
    // 批量入队操作
    public int offerBatch(Collection<T> batch) {
        if (batch.isEmpty()) {
            return 0;
        }
        
        int count = 0;
        LinkedQueueNode<T> firstNode = null;
        LinkedQueueNode<T> lastNode = null;
        
        // 批量创建节点
        for (T item : batch) {
            LinkedQueueNode<T> newNode = newNode(item);
            if (firstNode == null) {
                firstNode = newNode;
            }
            if (lastNode != null) {
                lastNode.soNext(newNode);
            }
            lastNode = newNode;
            count++;
        }
        
        // 原子更新生产者节点
        final LinkedQueueNode<T> prevProducerNode = producerNode.getAndSet(lastNode);
        prevProducerNode.soNext(firstNode);
        
        return count;
    }
    
    // 出队操作（单消费者，无需同步）
    @Override
    public T poll() {
        final LinkedQueueNode<T> currConsumerNode = consumerNode.get();
        final LinkedQueueNode<T> nextNode = currConsumerNode.lvNext();
        
        if (nextNode != null) {
            final T nextValue = nextNode.getAndNullValue();
            consumerNode.lazySet(nextNode);
            currConsumerNode.soNext(null); // 帮助GC
            return nextValue;
        }
        
        return null;
    }
    
    // 批量出队操作
    public List<T> pollBatch(int maxBatchSize) {
        List<T> batch = new ArrayList<>(maxBatchSize);
        
        for (int i = 0; i < maxBatchSize; i++) {
            T item = poll();
            if (item == null) {
                break;
            }
            batch.add(item);
        }
        
        return batch;
    }
}

// Netty EventLoop中的批处理实现
public abstract class SingleThreadEventLoop extends SingleThreadEventExecutor implements EventLoop {
    
    // 使用MPSC队列实现任务缓冲
    private final Queue<Runnable> taskQueue;
    
    // 批处理配置
    private static final int MAX_BATCH_SIZE = 1024;
    private static final int MAX_BATCH_TIME = 1000000; // 1ms in nanoseconds
    
    public SingleThreadEventLoop(EventLoopGroup parent, Executor executor, boolean addTaskWakesUp) {
        super(parent, executor, addTaskWakesUp);
        this.taskQueue = newTaskQueue();
    }
    
    // 批处理任务执行
    @Override
    protected void run() {
        for (;;) {
            Runnable task = takeTask();
            if (task != null) {
                task.run();
                updateLastExecutionTime();
            }
            
            // 批处理优化：连续处理多个任务
            if (task != null) {
                processBatchTasks();
            }
            
            if (confirmShutdown()) {
                break;
            }
        }
    }
    
    // 批量处理任务
    private void processBatchTasks() {
        int processed = 0;
        long startTime = System.nanoTime();
        
        // 批量处理任务，直到达到批次大小或时间限制
        while (processed < MAX_BATCH_SIZE) {
            long currentTime = System.nanoTime();
            if (currentTime - startTime > MAX_BATCH_TIME) {
                break; // 超过时间限制，退出批处理
            }
            
            Runnable task = taskQueue.poll();
            if (task == null) {
                break; // 没有更多任务，退出批处理
            }
            
            safeExecute(task);
            processed++;
        }
        
        if (processed > 0) {
            // 更新统计信息
            updateBatchStatistics(processed);
        }
    }
}

Netty的ByteBuf批处理优化

Netty在内存管理和数据处理中也大量使用了批处理技术。

// Netty ByteBuf批处理写入
public class ByteBufBatchWriter {
    
    private final ByteBuf buffer;
    private final int maxBatchSize;
    private int currentBatchSize;
    
    public ByteBufBatchWriter(ByteBuf buffer, int maxBatchSize) {
        this.buffer = buffer;
        this.maxBatchSize = maxBatchSize;
        this.currentBatchSize = 0;
    }
    
    // 批量写入整数
    public void writeIntBatch(int[] values) {
        int remaining = values.length;
        int offset = 0;
        
        while (remaining > 0) {
            int batchSize = Math.min(remaining, maxBatchSize - currentBatchSize);
            
            // 批量写入
            for (int i = 0; i < batchSize; i++) {
                buffer.writeInt(values[offset + i]);
            }
            
            currentBatchSize += batchSize;
            offset += batchSize;
            remaining -= batchSize;
            
            // 如果批次满了，执行刷新
            if (currentBatchSize >= maxBatchSize) {
                flush();
            }
        }
    }
    
    // 批量写入字节数组
    public void writeBytesBatch(List<byte[]> byteArrays) {
        // 计算总大小
        int totalSize = byteArrays.stream().mapToInt(arr -> arr.length).sum();
        
        // 确保缓冲区有足够的空间
        buffer.ensureWritable(totalSize);
        
        // 批量写入
        for (byte[] arr : byteArrays) {
            buffer.writeBytes(arr);
        }
        
        currentBatchSize += byteArrays.size();
        
        // 检查是否需要刷新
        if (currentBatchSize >= maxBatchSize) {
            flush();
        }
    }
    
    // 刷新缓冲区
    public void flush() {
        if (currentBatchSize > 0) {
            // 执行实际的IO操作
            performIO();
            
            // 重置批次计数
            currentBatchSize = 0;
            
            // 清空缓冲区
            buffer.clear();
        }
    }
    
    private void performIO() {
        // 执行批量IO操作
        if (buffer.readableBytes() > 0) {
            // 这里会调用底层的Channel进行批量写入
            channel.writeAndFlush(buffer.slice());
        }
    }
}

// Netty ChannelPipeline中的批处理
public class BatchHandler extends ChannelDuplexHandler {
    
    private final Queue<Object> writeBatch = new ArrayDeque<>();
    private final int maxBatchSize;
    private final long maxBatchTimeNanos;
    
    private long lastFlushTime;
    
    public BatchHandler(int maxBatchSize, long maxBatchTimeMillis) {
        this.maxBatchSize = maxBatchSize;
        this.maxBatchTimeNanos = TimeUnit.MILLISECONDS.toNanos(maxBatchTimeMillis);
        this.lastFlushTime = System.nanoTime();
    }
    
    @Override
    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception {
        // 添加到批次队列
        writeBatch.offer(msg);
        
        // 检查是否需要刷新
        if (shouldFlush()) {
            flushBatch(ctx);
        }
    }
    
    @Override
    public void flush(ChannelHandlerContext ctx) throws Exception {
        flushBatch(ctx);
        ctx.flush();
    }
    
    private boolean shouldFlush() {
        // 基于批次大小
        if (writeBatch.size() >= maxBatchSize) {
            return true;
        }
        
        // 基于时间间隔
        long currentTime = System.nanoTime();
        if (currentTime - lastFlushTime >= maxBatchTimeNanos) {
            return true;
        }
        
        return false;
    }
    
    private void flushBatch(ChannelHandlerContext ctx) {
        if (writeBatch.isEmpty()) {
            return;
        }
        
        List<Object> batch = new ArrayList<>();
        Object msg;
        
        // 收集批次数据
        while ((msg = writeBatch.poll()) != null) {
            batch.add(msg);
            
            if (batch.size() >= maxBatchSize) {
                break;
            }
        }
        
        // 批量处理
        if (!batch.isEmpty()) {
            processBatch(ctx, batch);
            lastFlushTime = System.nanoTime();
        }
    }
    
    private void processBatch(ChannelHandlerContext ctx, List<Object> batch) {
        // 批量处理逻辑
        if (batch.size() == 1) {
            // 单个消息直接处理
            ctx.write(batch.get(0));
        } else {
            // 多个消息批量处理
            BatchMessage batchMsg = new BatchMessage(batch);
            ctx.write(batchMsg);
        }
    }
}

消息队列中的队列缓冲+批处理架构

消息队列架构设计

消息队列是队列缓冲+批处理架构的典型应用，通过消息缓冲和批量处理实现高性能。

Kafka的批处理架构

Apache Kafka在设计和实现中大量使用了批处理技术来提升性能。

// Kafka生产者批处理实现
public class KafkaBatchProducer {
    
    private final KafkaProducer<String, Object> producer;
    private final int batchSize;
    private final long lingerMs;
    
    public KafkaBatchProducer(Properties props, int batchSize, long lingerMs) {
        // 配置批处理参数
        props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
        props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs);
        props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "lz4");
        props.put(ProducerConfig.ACKS_CONFIG, "1");
        
        this.producer = new KafkaProducer<>(props);
        this.batchSize = batchSize;
        this.lingerMs = lingerMs;
    }
    
    // 批量发送消息
    public void sendBatch(List<ProducerRecord<String, Object>> records) {
        List<CompletableFuture<RecordMetadata>> futures = new ArrayList<>();
        
        for (ProducerRecord<String, Object> record : records) {
            CompletableFuture<RecordMetadata> future = new CompletableFuture<>();
            
            producer.send(record, (metadata, exception) -> {
                if (exception != null) {
                    future.completeExceptionally(exception);
                } else {
                    future.complete(metadata);
                }
            });
            
            futures.add(future);
        }
        
        // 等待所有消息发送完成
        CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
        
        // 统计发送结果
        long successCount = futures.stream()
            .filter(f -> !f.isCompletedExceptionally())
            .count();
            
        log.info("批量发送完成: total={}, success={}", records.size(), successCount);
    }
    
    // 异步批量发送
    public CompletableFuture<List<RecordMetadata>> sendBatchAsync(List<ProducerRecord<String, Object>> records) {
        List<CompletableFuture<RecordMetadata>> futures = new ArrayList<>();
        
        for (ProducerRecord<String, Object> record : records) {
            CompletableFuture<RecordMetadata> future = new CompletableFuture<>();
            
            producer.send(record, (metadata, exception) -> {
                if (exception != null) {
                    future.completeExceptionally(exception);
                } else {
                    future.complete(metadata);
                }
            });
            
            futures.add(future);
        }
        
        // 组合所有future
        return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
            .thenApply(v -> futures.stream()
                .map(CompletableFuture::join)
                .collect(Collectors.toList()));
    }
}

// Kafka消费者批处理实现
public class KafkaBatchConsumer {
    
    private final KafkaConsumer<String, Object> consumer;
    private final int maxPollRecords;
    private final long maxPollIntervalMs;
    
    public KafkaBatchConsumer(Properties props, int maxPollRecords, long maxPollIntervalMs) {
        // 配置批处理参数
        props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
        props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, maxPollIntervalMs);
        props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        
        this.consumer = new KafkaConsumer<>(props);
        this.maxPollRecords = maxPollRecords;
        this.maxPollIntervalMs = maxPollIntervalMs;
    }
    
    // 批量消费消息
    public void consumeBatch() {
        try {
            while (true) {
                // 批量拉取消息
                ConsumerRecords<String, Object> records = consumer.poll(Duration.ofMillis(100));
                
                if (records.isEmpty()) {
                    continue;
                }
                
                log.info("拉取到消息批次: count={}", records.count());
                
                // 按分区组织消息
                Map<TopicPartition, List<ConsumerRecord<String, Object>>> partitionRecords = 
                    new HashMap<>();
                
                for (ConsumerRecord<String, Object> record : records) {
                    TopicPartition partition = new TopicPartition(record.topic(), record.partition());
                    partitionRecords.computeIfAbsent(partition, k -> new ArrayList<>())
                        .add(record);
                }
                
                // 并行处理各分区的消息批次
                List<CompletableFuture<Void>> futures = new ArrayList<>();
                
                for (Map.Entry<TopicPartition, List<ConsumerRecord<String, Object>>> entry : 
                     partitionRecords.entrySet()) {
                    
                    CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
                        processBatch(entry.getKey(), entry.getValue());
                    });
                    
                    futures.add(future);
                }
                
                // 等待所有批次处理完成
                CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
                
                // 批量提交偏移量
                commitOffsets(partitionRecords);
                
                log.info("消息批次处理完成: total={}", records.count());
            }
            
        } catch (Exception e) {
            log.error("批量消费异常", e);
        } finally {
            consumer.close();
        }
    }
    
    // 批量处理消息
    private void processBatch(TopicPartition partition, 
                             List<ConsumerRecord<String, Object>> records) {
        if (records.isEmpty()) {
            return;
        }
        
        log.info("处理消息批次: partition={}, count={}", partition, records.size());
        
        try {
            // 1. 数据预处理
            List<ProcessedRecord> processedRecords = preprocessRecords(records);
            
            // 2. 批量业务处理
            BatchProcessResult result = batchProcess(processedRecords);
            
            // 3. 结果后处理
            postprocessResults(result);
            
            // 4. 记录处理偏移量
            long lastOffset = records.get(records.size() - 1).offset();
            recordProcessedOffset(partition, lastOffset);
            
            log.info("消息批次处理成功: partition={}, count={}, lastOffset={}", 
                partition, records.size(), lastOffset);
            
        } catch (Exception e) {
            log.error("消息批次处理失败: partition={}, count={}", 
                partition, records.size(), e);
            
            // 处理失败的消息
            handleBatchFailure(partition, records, e);
        }
    }
    
    // 批量提交偏移量
    private void commitOffsets(Map<TopicPartition, List<ConsumerRecord<String, Object>>> 
                              partitionRecords) {
        Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
        
        for (Map.Entry<TopicPartition, List<ConsumerRecord<String, Object>>> entry : 
             partitionRecords.entrySet()) {
            
            TopicPartition partition = entry.getKey();
            List<ConsumerRecord<String, Object>> records = entry.getValue();
            
            if (!records.isEmpty()) {
                // 提交最后一条消息的下一个偏移量
                long lastOffset = records.get(records.size() - 1).offset();
                offsets.put(partition, new OffsetAndMetadata(lastOffset + 1));
            }
        }
        
        // 批量提交
        consumer.commitSync(offsets);
        log.info("批量提交偏移量完成: partitions={}", offsets.size());
    }
}

RabbitMQ的批处理优化

RabbitMQ也提供了多种批处理机制来提升消息处理性能。

// RabbitMQ批处理生产者
public class RabbitBatchProducer {
    
    private final RabbitTemplate rabbitTemplate;
    private final int batchSize;
    private final long batchTimeout;
    
    // 批处理缓冲区
    private final List<Message> batchBuffer = new ArrayList<>();
    private long lastBatchTime = System.currentTimeMillis();
    
    public RabbitBatchProducer(RabbitTemplate rabbitTemplate, int batchSize, long batchTimeout) {
        this.rabbitTemplate = rabbitTemplate;
        this.batchSize = batchSize;
        this.batchTimeout = batchTimeout;
        
        // 启动批处理定时器
        startBatchTimer();
    }
    
    // 发送单条消息（可能触发批处理）
    public void sendMessage(String exchange, String routingKey, Object payload) {
        Message message = MessageBuilder.withBody(serialize(payload))
            .setContentType("application/json")
            .setMessageId(UUID.randomUUID().toString())
            .setTimestamp(new Date())
            .build();
        
        synchronized (batchBuffer) {
            batchBuffer.add(message);
            
            // 检查是否需要立即发送批次
            if (batchBuffer.size() >= batchSize) {
                flushBatch(exchange, routingKey);
            }
        }
    }
    
    // 批量发送消息
    public void sendBatch(String exchange, String routingKey, List<Object> payloads) {
        List<Message> messages = payloads.stream()
            .map(payload -> MessageBuilder.withBody(serialize(payload))
                .setContentType("application/json")
                .setMessageId(UUID.randomUUID().toString())
                .setTimestamp(new Date())
                .build())
            .collect(Collectors.toList());
        
        // 将大批次拆分为多个小批次
        List<List<Message>> batches = splitIntoBatches(messages, batchSize);
        
        for (List<Message> batch : batches) {
            sendBatchMessage(exchange, routingKey, batch);
        }
    }
    
    // 发送批量消息
    private void sendBatchMessage(String exchange, String routingKey, List<Message> messages) {
        if (messages.isEmpty()) {
            return;
        }
        
        // 创建批量消息
        BatchMessage batchMessage = new BatchMessage(messages);
        
        Message batchMsg = MessageBuilder.withBody(serialize(batchMessage))
            .setContentType("application/x-java-serialized-batch")
            .setHeader("X-Batch-Size", messages.size())
            .setHeader("X-Batch-Id", UUID.randomUUID().toString())
            .build();
        
        rabbitTemplate.send(exchange, routingKey, batchMsg);
        
        log.info("批量消息发送完成: exchange={}, routingKey={}, count={}", 
            exchange, routingKey, messages.size());
    }
    
    // 定时批处理
    private void startBatchTimer() {
        ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
        
        scheduler.scheduleAtFixedRate(() -> {
            synchronized (batchBuffer) {
                long currentTime = System.currentTimeMillis();
                
                // 检查是否超时需要发送
                if (!batchBuffer.isEmpty() && 
                    (currentTime - lastBatchTime >= batchTimeout || 
                     batchBuffer.size() >= batchSize)) {
                    
                    flushBatch("batch.exchange", "batch.routing.key");
                }
            }
        }, batchTimeout, batchTimeout, TimeUnit.MILLISECONDS);
    }
    
    // 刷新批次
    private void flushBatch(String exchange, String routingKey) {
        if (batchBuffer.isEmpty()) {
            return;
        }
        
        List<Message> batch = new ArrayList<>(batchBuffer);
        batchBuffer.clear();
        lastBatchTime = System.currentTimeMillis();
        
        sendBatchMessage(exchange, routingKey, batch);
    }
}

// RabbitMQ批处理消费者
public class RabbitBatchConsumer {
    
    private final RabbitTemplate rabbitTemplate;
    private final int batchSize;
    private final long batchTimeout;
    
    public RabbitBatchConsumer(RabbitTemplate rabbitTemplate, int batchSize, long batchTimeout) {
        this.rabbitTemplate = rabbitTemplate;
        this.batchSize = batchSize;
        this.batchTimeout = batchTimeout;
    }
    
    // 批量监听消息
    @RabbitListener(queues = "batch.queue", containerFactory = "batchContainerFactory")
    public void handleBatchMessages(List<Message> messages) {
        if (messages.isEmpty()) {
            return;
        }
        
        log.info("批量接收消息: count={}", messages.size());
        
        try {
            // 1. 消息预处理
            List<ProcessedMessage> processedMessages = preprocessMessages(messages);
            
            // 2. 批量业务处理
            BatchProcessResult result = processMessageBatch(processedMessages);
            
            // 3. 批量确认
            acknowledgeBatchMessages(messages, result);
            
            log.info("批量消息处理完成: total={}, success={}, failed={}", 
                messages.size(), result.getSuccessCount(), result.getFailureCount());
            
        } catch (Exception e) {
            log.error("批量消息处理异常", e);
            
            // 处理失败的消息
            handleBatchFailure(messages, e);
        }
    }
    
    // 批量处理消息
    private BatchProcessResult processMessageBatch(List<ProcessedMessage> messages) {
        BatchProcessResult result = new BatchProcessResult();
        
        // 按消息类型分组
        Map<String, List<ProcessedMessage>> groupedMessages = messages.stream()
            .collect(Collectors.groupingBy(ProcessedMessage::getMessageType));
        
        // 并行处理不同类型的消息
        List<CompletableFuture<Void>> futures = new ArrayList<>();
        
        for (Map.Entry<String, List<ProcessedMessage>> entry : groupedMessages.entrySet()) {
            CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
                processMessageGroup(entry.getKey(), entry.getValue(), result);
            });
            
            futures.add(future);
        }
        
        // 等待所有处理完成
        CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
        
        return result;
    }
    
    // 处理消息分组
    private void processMessageGroup(String messageType, List<ProcessedMessage> messages, 
                                    BatchProcessResult result) {
        try {
            switch (messageType) {
                case "ORDER_CREATED":
                    processOrderCreatedBatch(messages, result);
                    break;
                case "INVENTORY_DEDUCTION":
                    processInventoryDeductionBatch(messages, result);
                    break;
                case "PAYMENT_COMPLETED":
                    processPaymentCompletedBatch(messages, result);
                    break;
                default:
                    log.warn("未知消息类型: {}", messageType);
                    result.addFailures(messages.size());
            }
        } catch (Exception e) {
            log.error("消息分组处理失败: type={}, count={}", messageType, messages.size(), e);
            result.addFailures(messages.size());
        }
    }
}

数据库写入中的队列缓冲+批处理架构

数据库批处理架构设计

数据库写入场景是队列缓冲+批处理架构的另一个重要应用领域。

数据库批处理实现

// 数据库批处理写入器
public class DatabaseBatchWriter {
    
    private final DataSource dataSource;
    private final int batchSize;
    private final long batchTimeout;
    private final ScheduledExecutorService scheduler;
    
    // 批处理缓冲区
    private final ConcurrentHashMap<String, BatchBuffer> batchBuffers = new ConcurrentHashMap<>();
    
    public DatabaseBatchWriter(DataSource dataSource, int batchSize, long batchTimeout) {
        this.dataSource = dataSource;
        this.batchSize = batchSize;
        this.batchTimeout = batchTimeout;
        this.scheduler = Executors.newScheduledThreadPool(2);
        
        // 启动批处理定时器
        startBatchProcessors();
    }
    
    // 异步写入数据
    public CompletableFuture<Void> writeAsync(String table, Map<String, Object> data) {
        CompletableFuture<Void> future = new CompletableFuture<>();
        
        BatchBuffer buffer = batchBuffers.computeIfAbsent(table, 
            k -> new BatchBuffer(table, batchSize));
        
        buffer.add(data, future);
        
        return future;
    }
    
    // 批量插入数据
    public int batchInsert(String table, List<Map<String, Object>> records) {
        if (records.isEmpty()) {
            return 0;
        }
        
        String sql = buildBatchInsertSql(table, records.get(0).keySet());
        
        try (Connection conn = dataSource.getConnection();
             PreparedStatement pstmt = conn.prepareStatement(sql)) {
            
            conn.setAutoCommit(false);
            
            int count = 0;
            for (Map<String, Object> record : records) {
                setParameters(pstmt, record);
                pstmt.addBatch();
                count++;
                
                // 每1000条执行一次批处理
                if (count % 1000 == 0) {
                    int[] results = pstmt.executeBatch();
                    conn.commit();
                    log.info("批量插入进度: table={}, count={}", table, count);
                }
            }
            
            // 执行剩余的批处理
            if (count % 1000 != 0) {
                int[] results = pstmt.executeBatch();
                conn.commit();
            }
            
            log.info("批量插入完成: table={}, total={}", table, count);
            return count;
            
        } catch (SQLException e) {
            log.error("批量插入失败: table={}, count={}", table, records.size(), e);
            throw new DatabaseException("批量插入失败", e);
        }
    }
    
    // 批量更新数据
    public int batchUpdate(String table, List<Map<String, Object>> records, String keyColumn) {
        if (records.isEmpty()) {
            return 0;
        }
        
        String sql = buildBatchUpdateSql(table, records.get(0).keySet(), keyColumn);
        
        try (Connection conn = dataSource.getConnection();
             PreparedStatement pstmt = conn.prepareStatement(sql)) {
            
            conn.setAutoCommit(false);
            
            int count = 0;
            for (Map<String, Object> record : records) {
                setUpdateParameters(pstmt, record, keyColumn);
                pstmt.addBatch();
                count++;
                
                // 每500条执行一次批处理
                if (count % 500 == 0) {
                    int[] results = pstmt.executeBatch();
                    conn.commit();
                    log.info("批量更新进度: table={}, count={}", table, count);
                }
            }
            
            // 执行剩余的批处理
            if (count % 500 != 0) {
                int[] results = pstmt.executeBatch();
                conn.commit();
            }
            
            log.info("批量更新完成: table={}, total={}", table, count);
            return count;
            
        } catch (SQLException e) {
            log.error("批量更新失败: table={}, count={}", table, records.size(), e);
            throw new DatabaseException("批量更新失败", e);
        }
    }
    
    // 构建批量插入SQL
    private String buildBatchInsertSql(String table, Set<String> columns) {
        StringBuilder sql = new StringBuilder("INSERT INTO ").append(table).append(" (");
        
        // 列名
        String columnNames = String.join(", ", columns);
        sql.append(columnNames).append(") VALUES (");
        
        // 占位符
        String placeholders = columns.stream()
            .map(c -> "?")
            .collect(Collectors.joining(", "));
        sql.append(placeholders).append(")");
        
        return sql.toString();
    }
    
    // 构建批量更新SQL
    private String buildBatchUpdateSql(String table, Set<String> columns, String keyColumn) {
        StringBuilder sql = new StringBuilder("UPDATE ").append(table).append(" SET ");
        
        // 更新列
        String updateColumns = columns.stream()
            .filter(col -> !col.equals(keyColumn))
            .map(col -> col + " = ?")
            .collect(Collectors.joining(", "));
        sql.append(updateColumns);
        
        // WHERE条件
        sql.append(" WHERE ").append(keyColumn).append(" = ?");
        
        return sql.toString();
    }
    
    // 启动批处理器
    private void startBatchProcessors() {
        // 定时批处理
        scheduler.scheduleAtFixedRate(() -> {
            processTimeoutBatches();
        }, batchTimeout, batchTimeout, TimeUnit.MILLISECONDS);
        
        // 监控批处理缓冲区
        scheduler.scheduleAtFixedRate(() -> {
            monitorBatchBuffers();
        }, 30, 30, TimeUnit.SECONDS);
    }
    
    // 批处理缓冲区
    private class BatchBuffer {
        private final String table;
        private final int maxSize;
        private final List<Map<String, Object>> buffer = new ArrayList<>();
        private final List<CompletableFuture<Void>> futures = new ArrayList<>();
        private long lastFlushTime = System.currentTimeMillis();
        
        public BatchBuffer(String table, int maxSize) {
            this.table = table;
            this.maxSize = maxSize;
        }
        
        public synchronized void add(Map<String, Object> data, CompletableFuture<Void> future) {
            buffer.add(data);
            futures.add(future);
            
            // 检查是否需要立即刷新
            if (buffer.size() >= maxSize) {
                flush();
            }
        }
        
        public synchronized void flush() {
            if (buffer.isEmpty()) {
                return;
            }
            
            List<Map<String, Object>> currentBuffer = new ArrayList<>(buffer);
            List<CompletableFuture<Void>> currentFutures = new ArrayList<>(futures);
            
            buffer.clear();
            futures.clear();
            lastFlushTime = System.currentTimeMillis();
            
            // 异步执行批处理
            CompletableFuture.runAsync(() -> {
                try {
                    int count = batchInsert(table, currentBuffer);
                    log.info("批处理完成: table={}, count={}", table, count);
                    
                    // 完成所有future
                    currentFutures.forEach(future -> future.complete(null));
                    
                } catch (Exception e) {
                    log.error("批处理失败: table={}, count={}", table, currentBuffer.size(), e);
                    
                    // 异常完成所有future
                    currentFutures.forEach(future -> future.completeExceptionally(e));
                }
            });
        }
        
        public synchronized boolean shouldFlush() {
            return !buffer.isEmpty() && 
                   (buffer.size() >= maxSize || 
                    System.currentTimeMillis() - lastFlushTime >= batchTimeout);
        }
    }
}

数据库连接池批处理优化

// 数据库连接池批处理优化
public class PooledBatchProcessor {
    
    private final HikariDataSource dataSource;
    private final ThreadLocal<BatchContext> batchContext = new ThreadLocal<>();
    
    public PooledBatchProcessor(HikariConfig config) {
        // 优化连接池配置
        config.setMaximumPoolSize(50);
        config.setMinimumIdle(10);
        config.setConnectionTimeout(30000);
        config.setIdleTimeout(600000);
        config.setMaxLifetime(1800000);
        config.setLeakDetectionThreshold(60000);
        
        // 批处理优化
        config.addDataSourceProperty("cachePrepStmts", "true");
        config.addDataSourceProperty("prepStmtCacheSize", "250");
        config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048");
        config.addDataSourceProperty("useServerPrepStmts", "true");
        config.addDataSourceProperty("rewriteBatchedStatements", "true");
        
        this.dataSource = new HikariDataSource(config);
    }
    
    // 批处理上下文
    private class BatchContext {
        private final Connection connection;
        private final Map<String, PreparedStatement> statements = new HashMap<>();
        private final Map<String, Integer> batchCounts = new HashMap<>();
        private final int batchSize;
        
        public BatchContext(Connection connection, int batchSize) throws SQLException {
            this.connection = connection;
            this.batchSize = batchSize;
            this.connection.setAutoCommit(false);
        }
        
        public void addBatch(String sql, Object... parameters) throws SQLException {
            PreparedStatement pstmt = statements.computeIfAbsent(sql, k -> {
                try {
                    return connection.prepareStatement(k);
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }
            });
            
            // 设置参数
            for (int i = 0; i < parameters.length; i++) {
                pstmt.setObject(i + 1, parameters[i]);
            }
            
            pstmt.addBatch();
            
            // 更新计数
            int count = batchCounts.getOrDefault(sql, 0) + 1;
            batchCounts.put(sql, count);
            
            // 检查是否需要执行批处理
            if (count >= batchSize) {
                executeBatch(sql);
            }
        }
        
        public void executeBatch(String sql) throws SQLException {
            PreparedStatement pstmt = statements.get(sql);
            if (pstmt != null) {
                int[] results = pstmt.executeBatch();
                connection.commit();
                
                batchCounts.put(sql, 0);
                
                log.info("批处理执行完成: sql={}, count={}", sql, results.length);
            }
        }
        
        public void executeAllBatches() throws SQLException {
            for (String sql : statements.keySet()) {
                executeBatch(sql);
            }
        }
        
        public void close() {
            try {
                executeAllBatches();
                connection.commit();
            } catch (SQLException e) {
                try {
                    connection.rollback();
                } catch (SQLException ex) {
                    log.error("回滚失败", ex);
                }
            } finally {
                try {
                    for (PreparedStatement pstmt : statements.values()) {
                        pstmt.close();
                    }
                    connection.close();
                } catch (SQLException e) {
                    log.error("关闭连接失败", e);
                }
            }
        }
    }
    
    // 开始批处理会话
    public BatchSession beginBatchSession(int batchSize) {
        try {
            Connection connection = dataSource.getConnection();
            BatchContext context = new BatchContext(connection, batchSize);
            batchContext.set(context);
            
            return new BatchSession(context);
        } catch (SQLException e) {
            throw new DatabaseException("开始批处理会话失败", e);
        }
    }
    
    // 批处理会话
    public class BatchSession implements AutoCloseable {
        private final BatchContext context;
        
        public BatchSession(BatchContext context) {
            this.context = context;
        }
        
        public void addBatch(String sql, Object... parameters) {
            try {
                context.addBatch(sql, parameters);
            } catch (SQLException e) {
                throw new DatabaseException("添加批处理失败", e);
            }
        }
        
        public void executeBatch(String sql) {
            try {
                context.executeBatch(sql);
            } catch (SQLException e) {
                throw new DatabaseException("执行批处理失败", e);
            }
        }
        
        public void commit() {
            try {
                context.executeAllBatches();
            } catch (SQLException e) {
                throw new DatabaseException("提交批处理失败", e);
            }
        }
        
        @Override
        public void close() {
            context.close();
            batchContext.remove();
        }
    }
    
    // 批量插入优化
    public int optimizedBatchInsert(String table, List<Map<String, Object>> records) {
        if (records.isEmpty()) {
            return 0;
        }
        
        try (BatchSession session = beginBatchSession(1000)) {
            // 构建优化的批量插入SQL
            String sql = buildOptimizedInsertSql(table, records.get(0).keySet(), 1000);
            
            // 分批处理
            for (int i = 0; i < records.size(); i += 1000) {
                int end = Math.min(i + 1000, records.size());
                List<Map<String, Object>> batch = records.subList(i, end);
                
                // 为每批构建参数
                Object[] params = buildBatchParameters(batch);
                session.addBatch(sql, params);
            }
            
            session.commit();
            
            log.info("优化批量插入完成: table={}, count={}", table, records.size());
            return records.size();
            
        } catch (Exception e) {
            log.error("优化批量插入失败: table={}, count={}", table, records.size(), e);
            throw new DatabaseException("优化批量插入失败", e);
        }
    }
    
    // 构建优化的批量插入SQL（MySQL多值插入）
    private String buildOptimizedInsertSql(String table, Set<String> columns, int batchSize) {
        StringBuilder sql = new StringBuilder("INSERT INTO ").append(table).append(" (");
        
        // 列名
        String columnNames = String.join(", ", columns);
        sql.append(columnNames).append(") VALUES ");
        
        // 多值插入
        String singleValue = "(" + columns.stream().map(c -> "?").collect(Collectors.joining(", ")) + ")";
        String allValues = IntStream.range(0, batchSize)
            .mapToObj(i -> singleValue)
            .collect(Collectors.joining(", "));
        
        sql.append(allValues);
        
        return sql.toString();
    }
}

队列缓冲+批处理架构最佳实践

1. 设计原则

// 队列缓冲+批处理设计原则
public class BatchProcessingPrinciples {
    
    /**
     * 原则1：合适的批次大小
     */
    public void demonstrateBatchSizeOptimization() {
        // 不好的做法：批次过大
        public class OversizedBatch {
            private static final int BATCH_SIZE = 10000; // 太大
            
            public void processBatch(List<Data> items) {
                // 可能导致内存溢出和长时间阻塞
            }
        }
        
        // 好的做法：根据场景选择合适批次大小
        public class OptimizedBatch {
            // 网络请求：小批次，减少延迟
            private static final int NETWORK_BATCH_SIZE = 10;
            
            // 数据库操作：中等批次，平衡性能和内存
            private static final int DATABASE_BATCH_SIZE = 100;
            
            // 文件处理：大批次，提高吞吐量
            private static final int FILE_BATCH_SIZE = 1000;
            
            // 动态批次大小调整
            public int calculateOptimalBatchSize(int availableMemory, 
                                               int itemSize, 
                                               int processingTime) {
                // 基于可用内存和项目大小计算
                int memoryBased = availableMemory / (itemSize * 2);
                
                // 基于处理时间调整
                int timeBased = processingTime < 100 ? 100 : 50;
                
                return Math.min(memoryBased, timeBased);
            }
        }
    }
    
    /**
     * 原则2：超时机制
     */
    public void demonstrateTimeoutMechanism() {
        // 不好的做法：没有超时机制
        public class NoTimeoutBatch {
            private final List<Item> buffer = new ArrayList<>();
            
            public void add(Item item) {
                buffer.add(item);
                if (buffer.size() >= 100) {
                    flush();
                }
                // 问题：如果数据量小，可能长时间不刷新
            }
        }
        
        // 好的做法：双重触发机制
        public class TimeoutBatch {
            private final List<Item> buffer = new ArrayList<>();
            private final int batchSize;
            private final long timeoutMillis;
            private long lastFlushTime = System.currentTimeMillis();
            
            public TimeoutBatch(int batchSize, long timeoutMillis) {
                this.batchSize = batchSize;
                this.timeoutMillis = timeoutMillis;
                
                // 启动定时刷新
                startPeriodicFlush();
            }
            
            public void add(Item item) {
                synchronized (buffer) {
                    buffer.add(item);
                    
                    // 检查是否需要立即刷新
                    if (shouldFlush()) {
                        flush();
                    }
                }
            }
            
            private boolean shouldFlush() {
                return buffer.size() >= batchSize || 
                       System.currentTimeMillis() - lastFlushTime >= timeoutMillis;
            }
            
            private void startPeriodicFlush() {
                ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
                scheduler.scheduleAtFixedRate(() -> {
                    synchronized (buffer) {
                        if (!buffer.isEmpty() && shouldFlush()) {
                            flush();
                        }
                    }
                }, timeoutMillis, timeoutMillis, TimeUnit.MILLISECONDS);
            }
        }
    }
    
    /**
     * 原则3：错误处理和重试
     */
    public void demonstrateErrorHandling() {
        // 不好的做法：简单的错误处理
        public class SimpleErrorHandling {
            public void processBatch(List<Item> items) {
                try {
                    // 批处理逻辑
                    database.batchInsert(items);
                } catch (Exception e) {
                    log.error("批处理失败", e);
                    // 问题：没有重试机制，数据丢失
                }
            }
        }
        
        // 好的做法：完善的错误处理
        public class RobustErrorHandling {
            private static final int MAX_RETRY_ATTEMPTS = 3;
            private static final long RETRY_DELAY_MS = 1000;
            
            public void processBatch(List<Item> items) {
                int attempt = 0;
                
                while (attempt < MAX_RETRY_ATTEMPTS) {
                    try {
                        // 尝试批处理
                        database.batchInsert(items);
                        return; // 成功，退出
                        
                    } catch (Exception e) {
                        attempt++;
                        log.error("批处理失败，尝试次数: {}", attempt, e);
                        
                        if (attempt >= MAX_RETRY_ATTEMPTS) {
                            // 最终失败，处理失败数据
                            handleFailedItems(items, e);
                            return;
                        }
                        
                        // 等待重试
                        try {
                            Thread.sleep(RETRY_DELAY_MS * attempt);
                        } catch (InterruptedException ie) {
                            Thread.currentThread().interrupt();
                            return;
                        }
                    }
                }
            }
            
            private void handleFailedItems(List<Item> items, Exception e) {
                // 1. 记录失败日志
                logFailedItems(items, e);
                
                // 2. 发送到死信队列
                sendToDeadLetterQueue(items);
                
                // 3. 发送告警
                sendAlert(items.size(), e);
            }
        }
    }
    
    /**
     * 原则4：背压处理
     */
    public void demonstrateBackpressureHandling() {
        // 不好的做法：无限制的缓冲
        public class UnboundedBuffer {
            private final Queue<Item> queue = new ConcurrentLinkedQueue<>();
            
            public void produce(Item item) {
                queue.offer(item); // 可能导致内存溢出
            }
        }
        
        // 好的做法：有界缓冲和背压
        public class BackpressureBuffer {
            private final BlockingQueue<Item> queue;
            private final int capacity;
            private final AtomicInteger droppedCount = new AtomicInteger(0);
            
            public BackpressureBuffer(int capacity) {
                this.capacity = capacity;
                this.queue = new ArrayBlockingQueue<>(capacity);
            }
            
            public boolean produce(Item item, long timeout, TimeUnit unit) {
                try {
                    // 尝试放入队列，带超时
                    boolean offered = queue.offer(item, timeout, unit);
                    
                    if (!offered) {
                        // 队列满，执行背压策略
                        return handleBackpressure(item);
                    }
                    
                    return true;
                    
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    return false;
                }
            }
            
            private boolean handleBackpressure(Item item) {
                // 背压策略1：丢弃最老的数据
                Item dropped = queue.poll();
                if (dropped != null) {
                    queue.offer(item);
                    droppedCount.incrementAndGet();
                    log.warn("背压：丢弃数据，总计丢弃: {}", droppedCount.get());
                    return true;
                }
                
                // 背压策略2：丢弃当前数据
                droppedCount.incrementAndGet();
                log.warn("背压：丢弃新数据，总计丢弃: {}", droppedCount.get());
                return false;
            }
        }
    }
}

2. 性能调优建议

# 队列缓冲+批处理性能调优配置
performance_tuning:
  # 批次大小配置
  batch_sizes:
    network_requests: 10          # 网络请求批次大小
    database_operations: 100      # 数据库操作批次大小
    file_operations: 1000         # 文件操作批次大小
    memory_operations: 10000      # 内存操作批次大小
    
  # 超时配置
  timeouts:
    batch_timeout_ms: 100         # 批次超时时间
    flush_interval_ms: 50         # 刷新间隔
    retry_delay_ms: 1000          # 重试延迟
    max_retry_attempts: 3         # 最大重试次数
    
  # 内存配置
  memory:
    max_buffer_size: 100000       # 最大缓冲区大小
    buffer_cleanup_threshold: 0.8 # 缓冲区清理阈值
    object_pool_size: 1000        # 对象池大小
    
  # 线程池配置
  thread_pool:
    core_pool_size: 10            # 核心线程数
    max_pool_size: 50             # 最大线程数
    queue_capacity: 1000          # 队列容量
    keep_alive_seconds: 60        # 线程存活时间
    
  # JVM优化
  jvm:
    heap_size: "4g"               # 堆内存大小
    gc_type: "G1GC"               # 垃圾收集器
    gc_max_pause: 200             # GC最大暂停时间
    heap_regions_size: "16m"      # 堆区域大小

# 数据库批处理优化
database_optimization:
  # 连接池配置
  hikari:
    maximum_pool_size: 50         # 最大连接数
    minimum_idle: 10              # 最小空闲连接
    connection_timeout: 30000     # 连接超时
    idle_timeout: 600000          # 空闲超时
    max_lifetime: 1800000         # 最大生命周期
    
  # MySQL优化
  mysql:
    innodb_buffer_pool_size: "8G" # InnoDB缓冲池
    innodb_log_file_size: "2G"    # InnoDB日志文件大小
    innodb_flush_log_at_trx_commit: 2 # 事务提交刷新策略
    query_cache_type: 0           # 查询缓存类型
    
  # PostgreSQL优化
  postgresql:
    shared_buffers: "2GB"         # 共享缓冲区
    work_mem: "64MB"              # 工作内存
    maintenance_work_mem: "256MB" # 维护工作内存
    effective_cache_size: "6GB"   # 有效缓存大小

# 消息队列批处理优化
message_queue_optimization:
  # Kafka配置
  kafka:
    batch_size: 16384             # 批处理大小
    linger_ms: 10                 # 延迟发送时间
    compression_type: lz4         # 压缩算法
    max_request_size: 10485760    # 最大请求大小
    buffer_memory: 33554432       # 缓冲区内存
    
  # RabbitMQ配置
  rabbitmq:
    prefetch_count: 100           # 预取数量
    consumer_batch_size: 50       # 消费者批次大小
    channel_cache_size: 25        # 通道缓存大小
    connection_cache_size: 5      # 连接缓存大小

3. 监控告警配置

# Prometheus队列缓冲+批处理监控配置
groups:
- name: batch_processing_monitoring
  rules:
  
  # 批处理队列积压告警
  - alert: BatchQueueBacklogHigh
    expr: batch_queue_size > 10000
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "批处理队列积压严重"
      description: "批处理队列 {{ $labels.queue }} 积压数据量 {{ $value }}"
  
  # 批处理延迟告警
  - alert: BatchProcessingLatencyHigh
    expr: batch_processing_duration_seconds{quantile="0.95"} > 30
    for: 3m
    labels:
      severity: warning
    annotations:
      summary: "批处理延迟过高"
      description: "批处理95分位延迟 {{ $value }}秒"
  
  # 批处理失败率告警
  - alert: BatchProcessingFailureRateHigh
    expr: rate(batch_processing_failures_total[5m]) / rate(batch_processing_total[5m]) > 0.05
    for: 2m
    labels:
      severity: critical
    annotations:
      summary: "批处理失败率过高"
      description: "批处理失败率 {{ $value | humanizePercentage }}"
  
  # 批处理缓冲区使用率告警
  - alert: BatchBufferUsageHigh
    expr: batch_buffer_used_bytes / batch_buffer_total_bytes > 0.9
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "批处理缓冲区使用率过高"
      description: "批处理缓冲区使用率 {{ $value | humanizePercentage }}"
  
  # 批处理重试次数告警
  - alert: BatchRetryCountHigh
    expr: batch_retry_count > 5
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "批处理重试次数过多"
      description: "批处理重试次数 {{ $value }}"
  
  # 批处理吞吐量告警
  - alert: BatchThroughputLow
    expr: rate(batch_processing_total[5m]) < 10
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "批处理吞吐量过低"
      description: "批处理吞吐量 {{ $value }}/秒"
  
  # 批处理内存使用告警
  - alert: BatchMemoryUsageHigh
    expr: batch_memory_used_bytes / batch_memory_max_bytes > 0.8
    for: 3m
    labels:
      severity: critical
    annotations:
      summary: "批处理内存使用率过高"
      description: "批处理内存使用率 {{ $value | humanizePercentage }}"
  
  # 批处理线程池饱和告警
  - alert: BatchThreadPoolSaturated
    expr: batch_thread_pool_active_threads / batch_thread_pool_max_threads > 0.9
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: "批处理线程池饱和"
      description: "批处理线程池使用率 {{ $value | humanizePercentage }}"