架构之队列缓冲+批处理
引言
“在高并发场景下,队列是流量的缓冲器,批处理是性能的倍增器”
在互联网高并发时代,系统面临着海量请求的瞬间冲击。无论是电商秒杀、社交网络的热点事件,还是金融交易的高峰期,都需要系统具备强大的流量缓冲能力和高效的处理机制。队列缓冲+批处理架构通过将请求进行队列化缓冲,然后采用异步批处理的方式,不仅提升了系统的吞吐量,还保证了系统的稳定性和可扩展性。
队列缓冲+批处理法则强调:在高并发场景,可以用队列对数据进行缓冲,然后通过异步批处理的模式,提升性能。这一法则在Netty、消息队列、数据库写入等众多场景中得到了广泛应用和验证。
队列缓冲+批处理架构的核心理念
为什么需要队列缓冲+批处理?
队列缓冲+批处理架构能够有效解决上述挑战:
- 削峰填谷:通过队列缓冲瞬时流量,平滑处理压力
- 批量优化:减少单次处理开销,提升整体吞吐量
- 异步解耦:降低系统耦合度,提高可维护性
- 资源复用:充分利用系统资源,提升处理效率
- 可靠性增强:通过缓冲机制保证系统稳定性
队列缓冲+批处理的核心优势
Netty中的队列缓冲+批处理架构
Netty架构概览
Netty作为高性能网络通信框架,其核心架构充分体现了队列缓冲+批处理的设计思想。
Netty的MPSC队列实现
Netty使用MPSC(Multiple Producer Single Consumer)队列实现高效的任务缓冲和批处理。
// Netty MPSC队列实现
public class MpscLinkedQueue<T> extends BaseLinkedQueue<T> {
// 使用原子引用实现无锁队列
private final AtomicReference<LinkedQueueNode<T>> producerNode;
private final AtomicReference<LinkedQueueNode<T>> consumerNode;
public MpscLinkedQueue() {
producerNode = new AtomicReference<>();
consumerNode = new AtomicReference<>();
// 初始化哨兵节点
LinkedQueueNode<T> node = newNode();
consumerNode.lazySet(node);
producerNode.lazySet(node);
}
// 无锁入队操作(多生产者)
@Override
public boolean offer(T e) {
if (null == e) {
throw new NullPointerException();
}
final LinkedQueueNode<T> nextNode = newNode(e);
final LinkedQueueNode<T> prevProducerNode = producerNode.getAndSet(nextNode);
// 链接新节点 - 使用有序写保证可见性
prevProducerNode.soNext(nextNode);
return true;
}
// 批量入队操作
public int offerBatch(Collection<T> batch) {
if (batch.isEmpty()) {
return 0;
}
int count = 0;
LinkedQueueNode<T> firstNode = null;
LinkedQueueNode<T> lastNode = null;
// 批量创建节点
for (T item : batch) {
LinkedQueueNode<T> newNode = newNode(item);
if (firstNode == null) {
firstNode = newNode;
}
if (lastNode != null) {
lastNode.soNext(newNode);
}
lastNode = newNode;
count++;
}
// 原子更新生产者节点
final LinkedQueueNode<T> prevProducerNode = producerNode.getAndSet(lastNode);
prevProducerNode.soNext(firstNode);
return count;
}
// 出队操作(单消费者,无需同步)
@Override
public T poll() {
final LinkedQueueNode<T> currConsumerNode = consumerNode.get();
final LinkedQueueNode<T> nextNode = currConsumerNode.lvNext();
if (nextNode != null) {
final T nextValue = nextNode.getAndNullValue();
consumerNode.lazySet(nextNode);
currConsumerNode.soNext(null); // 帮助GC
return nextValue;
}
return null;
}
// 批量出队操作
public List<T> pollBatch(int maxBatchSize) {
List<T> batch = new ArrayList<>(maxBatchSize);
for (int i = 0; i < maxBatchSize; i++) {
T item = poll();
if (item == null) {
break;
}
batch.add(item);
}
return batch;
}
}
// Netty EventLoop中的批处理实现
public abstract class SingleThreadEventLoop extends SingleThreadEventExecutor implements EventLoop {
// 使用MPSC队列实现任务缓冲
private final Queue<Runnable> taskQueue;
// 批处理配置
private static final int MAX_BATCH_SIZE = 1024;
private static final int MAX_BATCH_TIME = 1000000; // 1ms in nanoseconds
public SingleThreadEventLoop(EventLoopGroup parent, Executor executor, boolean addTaskWakesUp) {
super(parent, executor, addTaskWakesUp);
this.taskQueue = newTaskQueue();
}
// 批处理任务执行
@Override
protected void run() {
for (;;) {
Runnable task = takeTask();
if (task != null) {
task.run();
updateLastExecutionTime();
}
// 批处理优化:连续处理多个任务
if (task != null) {
processBatchTasks();
}
if (confirmShutdown()) {
break;
}
}
}
// 批量处理任务
private void processBatchTasks() {
int processed = 0;
long startTime = System.nanoTime();
// 批量处理任务,直到达到批次大小或时间限制
while (processed < MAX_BATCH_SIZE) {
long currentTime = System.nanoTime();
if (currentTime - startTime > MAX_BATCH_TIME) {
break; // 超过时间限制,退出批处理
}
Runnable task = taskQueue.poll();
if (task == null) {
break; // 没有更多任务,退出批处理
}
safeExecute(task);
processed++;
}
if (processed > 0) {
// 更新统计信息
updateBatchStatistics(processed);
}
}
}
Netty的ByteBuf批处理优化
Netty在内存管理和数据处理中也大量使用了批处理技术。
// Netty ByteBuf批处理写入
public class ByteBufBatchWriter {
private final ByteBuf buffer;
private final int maxBatchSize;
private int currentBatchSize;
public ByteBufBatchWriter(ByteBuf buffer, int maxBatchSize) {
this.buffer = buffer;
this.maxBatchSize = maxBatchSize;
this.currentBatchSize = 0;
}
// 批量写入整数
public void writeIntBatch(int[] values) {
int remaining = values.length;
int offset = 0;
while (remaining > 0) {
int batchSize = Math.min(remaining, maxBatchSize - currentBatchSize);
// 批量写入
for (int i = 0; i < batchSize; i++) {
buffer.writeInt(values[offset + i]);
}
currentBatchSize += batchSize;
offset += batchSize;
remaining -= batchSize;
// 如果批次满了,执行刷新
if (currentBatchSize >= maxBatchSize) {
flush();
}
}
}
// 批量写入字节数组
public void writeBytesBatch(List<byte[]> byteArrays) {
// 计算总大小
int totalSize = byteArrays.stream().mapToInt(arr -> arr.length).sum();
// 确保缓冲区有足够的空间
buffer.ensureWritable(totalSize);
// 批量写入
for (byte[] arr : byteArrays) {
buffer.writeBytes(arr);
}
currentBatchSize += byteArrays.size();
// 检查是否需要刷新
if (currentBatchSize >= maxBatchSize) {
flush();
}
}
// 刷新缓冲区
public void flush() {
if (currentBatchSize > 0) {
// 执行实际的IO操作
performIO();
// 重置批次计数
currentBatchSize = 0;
// 清空缓冲区
buffer.clear();
}
}
private void performIO() {
// 执行批量IO操作
if (buffer.readableBytes() > 0) {
// 这里会调用底层的Channel进行批量写入
channel.writeAndFlush(buffer.slice());
}
}
}
// Netty ChannelPipeline中的批处理
public class BatchHandler extends ChannelDuplexHandler {
private final Queue<Object> writeBatch = new ArrayDeque<>();
private final int maxBatchSize;
private final long maxBatchTimeNanos;
private long lastFlushTime;
public BatchHandler(int maxBatchSize, long maxBatchTimeMillis) {
this.maxBatchSize = maxBatchSize;
this.maxBatchTimeNanos = TimeUnit.MILLISECONDS.toNanos(maxBatchTimeMillis);
this.lastFlushTime = System.nanoTime();
}
@Override
public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception {
// 添加到批次队列
writeBatch.offer(msg);
// 检查是否需要刷新
if (shouldFlush()) {
flushBatch(ctx);
}
}
@Override
public void flush(ChannelHandlerContext ctx) throws Exception {
flushBatch(ctx);
ctx.flush();
}
private boolean shouldFlush() {
// 基于批次大小
if (writeBatch.size() >= maxBatchSize) {
return true;
}
// 基于时间间隔
long currentTime = System.nanoTime();
if (currentTime - lastFlushTime >= maxBatchTimeNanos) {
return true;
}
return false;
}
private void flushBatch(ChannelHandlerContext ctx) {
if (writeBatch.isEmpty()) {
return;
}
List<Object> batch = new ArrayList<>();
Object msg;
// 收集批次数据
while ((msg = writeBatch.poll()) != null) {
batch.add(msg);
if (batch.size() >= maxBatchSize) {
break;
}
}
// 批量处理
if (!batch.isEmpty()) {
processBatch(ctx, batch);
lastFlushTime = System.nanoTime();
}
}
private void processBatch(ChannelHandlerContext ctx, List<Object> batch) {
// 批量处理逻辑
if (batch.size() == 1) {
// 单个消息直接处理
ctx.write(batch.get(0));
} else {
// 多个消息批量处理
BatchMessage batchMsg = new BatchMessage(batch);
ctx.write(batchMsg);
}
}
}
消息队列中的队列缓冲+批处理架构
消息队列架构设计
消息队列是队列缓冲+批处理架构的典型应用,通过消息缓冲和批量处理实现高性能。
Kafka的批处理架构
Apache Kafka在设计和实现中大量使用了批处理技术来提升性能。
// Kafka生产者批处理实现
public class KafkaBatchProducer {
private final KafkaProducer<String, Object> producer;
private final int batchSize;
private final long lingerMs;
public KafkaBatchProducer(Properties props, int batchSize, long lingerMs) {
// 配置批处理参数
props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs);
props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "lz4");
props.put(ProducerConfig.ACKS_CONFIG, "1");
this.producer = new KafkaProducer<>(props);
this.batchSize = batchSize;
this.lingerMs = lingerMs;
}
// 批量发送消息
public void sendBatch(List<ProducerRecord<String, Object>> records) {
List<CompletableFuture<RecordMetadata>> futures = new ArrayList<>();
for (ProducerRecord<String, Object> record : records) {
CompletableFuture<RecordMetadata> future = new CompletableFuture<>();
producer.send(record, (metadata, exception) -> {
if (exception != null) {
future.completeExceptionally(exception);
} else {
future.complete(metadata);
}
});
futures.add(future);
}
// 等待所有消息发送完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
// 统计发送结果
long successCount = futures.stream()
.filter(f -> !f.isCompletedExceptionally())
.count();
log.info("批量发送完成: total={}, success={}", records.size(), successCount);
}
// 异步批量发送
public CompletableFuture<List<RecordMetadata>> sendBatchAsync(List<ProducerRecord<String, Object>> records) {
List<CompletableFuture<RecordMetadata>> futures = new ArrayList<>();
for (ProducerRecord<String, Object> record : records) {
CompletableFuture<RecordMetadata> future = new CompletableFuture<>();
producer.send(record, (metadata, exception) -> {
if (exception != null) {
future.completeExceptionally(exception);
} else {
future.complete(metadata);
}
});
futures.add(future);
}
// 组合所有future
return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
.thenApply(v -> futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList()));
}
}
// Kafka消费者批处理实现
public class KafkaBatchConsumer {
private final KafkaConsumer<String, Object> consumer;
private final int maxPollRecords;
private final long maxPollIntervalMs;
public KafkaBatchConsumer(Properties props, int maxPollRecords, long maxPollIntervalMs) {
// 配置批处理参数
props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, maxPollIntervalMs);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
this.consumer = new KafkaConsumer<>(props);
this.maxPollRecords = maxPollRecords;
this.maxPollIntervalMs = maxPollIntervalMs;
}
// 批量消费消息
public void consumeBatch() {
try {
while (true) {
// 批量拉取消息
ConsumerRecords<String, Object> records = consumer.poll(Duration.ofMillis(100));
if (records.isEmpty()) {
continue;
}
log.info("拉取到消息批次: count={}", records.count());
// 按分区组织消息
Map<TopicPartition, List<ConsumerRecord<String, Object>>> partitionRecords =
new HashMap<>();
for (ConsumerRecord<String, Object> record : records) {
TopicPartition partition = new TopicPartition(record.topic(), record.partition());
partitionRecords.computeIfAbsent(partition, k -> new ArrayList<>())
.add(record);
}
// 并行处理各分区的消息批次
List<CompletableFuture<Void>> futures = new ArrayList<>();
for (Map.Entry<TopicPartition, List<ConsumerRecord<String, Object>>> entry :
partitionRecords.entrySet()) {
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
processBatch(entry.getKey(), entry.getValue());
});
futures.add(future);
}
// 等待所有批次处理完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
// 批量提交偏移量
commitOffsets(partitionRecords);
log.info("消息批次处理完成: total={}", records.count());
}
} catch (Exception e) {
log.error("批量消费异常", e);
} finally {
consumer.close();
}
}
// 批量处理消息
private void processBatch(TopicPartition partition,
List<ConsumerRecord<String, Object>> records) {
if (records.isEmpty()) {
return;
}
log.info("处理消息批次: partition={}, count={}", partition, records.size());
try {
// 1. 数据预处理
List<ProcessedRecord> processedRecords = preprocessRecords(records);
// 2. 批量业务处理
BatchProcessResult result = batchProcess(processedRecords);
// 3. 结果后处理
postprocessResults(result);
// 4. 记录处理偏移量
long lastOffset = records.get(records.size() - 1).offset();
recordProcessedOffset(partition, lastOffset);
log.info("消息批次处理成功: partition={}, count={}, lastOffset={}",
partition, records.size(), lastOffset);
} catch (Exception e) {
log.error("消息批次处理失败: partition={}, count={}",
partition, records.size(), e);
// 处理失败的消息
handleBatchFailure(partition, records, e);
}
}
// 批量提交偏移量
private void commitOffsets(Map<TopicPartition, List<ConsumerRecord<String, Object>>>
partitionRecords) {
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
for (Map.Entry<TopicPartition, List<ConsumerRecord<String, Object>>> entry :
partitionRecords.entrySet()) {
TopicPartition partition = entry.getKey();
List<ConsumerRecord<String, Object>> records = entry.getValue();
if (!records.isEmpty()) {
// 提交最后一条消息的下一个偏移量
long lastOffset = records.get(records.size() - 1).offset();
offsets.put(partition, new OffsetAndMetadata(lastOffset + 1));
}
}
// 批量提交
consumer.commitSync(offsets);
log.info("批量提交偏移量完成: partitions={}", offsets.size());
}
}
RabbitMQ的批处理优化
RabbitMQ也提供了多种批处理机制来提升消息处理性能。
// RabbitMQ批处理生产者
public class RabbitBatchProducer {
private final RabbitTemplate rabbitTemplate;
private final int batchSize;
private final long batchTimeout;
// 批处理缓冲区
private final List<Message> batchBuffer = new ArrayList<>();
private long lastBatchTime = System.currentTimeMillis();
public RabbitBatchProducer(RabbitTemplate rabbitTemplate, int batchSize, long batchTimeout) {
this.rabbitTemplate = rabbitTemplate;
this.batchSize = batchSize;
this.batchTimeout = batchTimeout;
// 启动批处理定时器
startBatchTimer();
}
// 发送单条消息(可能触发批处理)
public void sendMessage(String exchange, String routingKey, Object payload) {
Message message = MessageBuilder.withBody(serialize(payload))
.setContentType("application/json")
.setMessageId(UUID.randomUUID().toString())
.setTimestamp(new Date())
.build();
synchronized (batchBuffer) {
batchBuffer.add(message);
// 检查是否需要立即发送批次
if (batchBuffer.size() >= batchSize) {
flushBatch(exchange, routingKey);
}
}
}
// 批量发送消息
public void sendBatch(String exchange, String routingKey, List<Object> payloads) {
List<Message> messages = payloads.stream()
.map(payload -> MessageBuilder.withBody(serialize(payload))
.setContentType("application/json")
.setMessageId(UUID.randomUUID().toString())
.setTimestamp(new Date())
.build())
.collect(Collectors.toList());
// 将大批次拆分为多个小批次
List<List<Message>> batches = splitIntoBatches(messages, batchSize);
for (List<Message> batch : batches) {
sendBatchMessage(exchange, routingKey, batch);
}
}
// 发送批量消息
private void sendBatchMessage(String exchange, String routingKey, List<Message> messages) {
if (messages.isEmpty()) {
return;
}
// 创建批量消息
BatchMessage batchMessage = new BatchMessage(messages);
Message batchMsg = MessageBuilder.withBody(serialize(batchMessage))
.setContentType("application/x-java-serialized-batch")
.setHeader("X-Batch-Size", messages.size())
.setHeader("X-Batch-Id", UUID.randomUUID().toString())
.build();
rabbitTemplate.send(exchange, routingKey, batchMsg);
log.info("批量消息发送完成: exchange={}, routingKey={}, count={}",
exchange, routingKey, messages.size());
}
// 定时批处理
private void startBatchTimer() {
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
scheduler.scheduleAtFixedRate(() -> {
synchronized (batchBuffer) {
long currentTime = System.currentTimeMillis();
// 检查是否超时需要发送
if (!batchBuffer.isEmpty() &&
(currentTime - lastBatchTime >= batchTimeout ||
batchBuffer.size() >= batchSize)) {
flushBatch("batch.exchange", "batch.routing.key");
}
}
}, batchTimeout, batchTimeout, TimeUnit.MILLISECONDS);
}
// 刷新批次
private void flushBatch(String exchange, String routingKey) {
if (batchBuffer.isEmpty()) {
return;
}
List<Message> batch = new ArrayList<>(batchBuffer);
batchBuffer.clear();
lastBatchTime = System.currentTimeMillis();
sendBatchMessage(exchange, routingKey, batch);
}
}
// RabbitMQ批处理消费者
public class RabbitBatchConsumer {
private final RabbitTemplate rabbitTemplate;
private final int batchSize;
private final long batchTimeout;
public RabbitBatchConsumer(RabbitTemplate rabbitTemplate, int batchSize, long batchTimeout) {
this.rabbitTemplate = rabbitTemplate;
this.batchSize = batchSize;
this.batchTimeout = batchTimeout;
}
// 批量监听消息
@RabbitListener(queues = "batch.queue", containerFactory = "batchContainerFactory")
public void handleBatchMessages(List<Message> messages) {
if (messages.isEmpty()) {
return;
}
log.info("批量接收消息: count={}", messages.size());
try {
// 1. 消息预处理
List<ProcessedMessage> processedMessages = preprocessMessages(messages);
// 2. 批量业务处理
BatchProcessResult result = processMessageBatch(processedMessages);
// 3. 批量确认
acknowledgeBatchMessages(messages, result);
log.info("批量消息处理完成: total={}, success={}, failed={}",
messages.size(), result.getSuccessCount(), result.getFailureCount());
} catch (Exception e) {
log.error("批量消息处理异常", e);
// 处理失败的消息
handleBatchFailure(messages, e);
}
}
// 批量处理消息
private BatchProcessResult processMessageBatch(List<ProcessedMessage> messages) {
BatchProcessResult result = new BatchProcessResult();
// 按消息类型分组
Map<String, List<ProcessedMessage>> groupedMessages = messages.stream()
.collect(Collectors.groupingBy(ProcessedMessage::getMessageType));
// 并行处理不同类型的消息
List<CompletableFuture<Void>> futures = new ArrayList<>();
for (Map.Entry<String, List<ProcessedMessage>> entry : groupedMessages.entrySet()) {
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
processMessageGroup(entry.getKey(), entry.getValue(), result);
});
futures.add(future);
}
// 等待所有处理完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
return result;
}
// 处理消息分组
private void processMessageGroup(String messageType, List<ProcessedMessage> messages,
BatchProcessResult result) {
try {
switch (messageType) {
case "ORDER_CREATED":
processOrderCreatedBatch(messages, result);
break;
case "INVENTORY_DEDUCTION":
processInventoryDeductionBatch(messages, result);
break;
case "PAYMENT_COMPLETED":
processPaymentCompletedBatch(messages, result);
break;
default:
log.warn("未知消息类型: {}", messageType);
result.addFailures(messages.size());
}
} catch (Exception e) {
log.error("消息分组处理失败: type={}, count={}", messageType, messages.size(), e);
result.addFailures(messages.size());
}
}
}
数据库写入中的队列缓冲+批处理架构
数据库批处理架构设计
数据库写入场景是队列缓冲+批处理架构的另一个重要应用领域。
数据库批处理实现
// 数据库批处理写入器
public class DatabaseBatchWriter {
private final DataSource dataSource;
private final int batchSize;
private final long batchTimeout;
private final ScheduledExecutorService scheduler;
// 批处理缓冲区
private final ConcurrentHashMap<String, BatchBuffer> batchBuffers = new ConcurrentHashMap<>();
public DatabaseBatchWriter(DataSource dataSource, int batchSize, long batchTimeout) {
this.dataSource = dataSource;
this.batchSize = batchSize;
this.batchTimeout = batchTimeout;
this.scheduler = Executors.newScheduledThreadPool(2);
// 启动批处理定时器
startBatchProcessors();
}
// 异步写入数据
public CompletableFuture<Void> writeAsync(String table, Map<String, Object> data) {
CompletableFuture<Void> future = new CompletableFuture<>();
BatchBuffer buffer = batchBuffers.computeIfAbsent(table,
k -> new BatchBuffer(table, batchSize));
buffer.add(data, future);
return future;
}
// 批量插入数据
public int batchInsert(String table, List<Map<String, Object>> records) {
if (records.isEmpty()) {
return 0;
}
String sql = buildBatchInsertSql(table, records.get(0).keySet());
try (Connection conn = dataSource.getConnection();
PreparedStatement pstmt = conn.prepareStatement(sql)) {
conn.setAutoCommit(false);
int count = 0;
for (Map<String, Object> record : records) {
setParameters(pstmt, record);
pstmt.addBatch();
count++;
// 每1000条执行一次批处理
if (count % 1000 == 0) {
int[] results = pstmt.executeBatch();
conn.commit();
log.info("批量插入进度: table={}, count={}", table, count);
}
}
// 执行剩余的批处理
if (count % 1000 != 0) {
int[] results = pstmt.executeBatch();
conn.commit();
}
log.info("批量插入完成: table={}, total={}", table, count);
return count;
} catch (SQLException e) {
log.error("批量插入失败: table={}, count={}", table, records.size(), e);
throw new DatabaseException("批量插入失败", e);
}
}
// 批量更新数据
public int batchUpdate(String table, List<Map<String, Object>> records, String keyColumn) {
if (records.isEmpty()) {
return 0;
}
String sql = buildBatchUpdateSql(table, records.get(0).keySet(), keyColumn);
try (Connection conn = dataSource.getConnection();
PreparedStatement pstmt = conn.prepareStatement(sql)) {
conn.setAutoCommit(false);
int count = 0;
for (Map<String, Object> record : records) {
setUpdateParameters(pstmt, record, keyColumn);
pstmt.addBatch();
count++;
// 每500条执行一次批处理
if (count % 500 == 0) {
int[] results = pstmt.executeBatch();
conn.commit();
log.info("批量更新进度: table={}, count={}", table, count);
}
}
// 执行剩余的批处理
if (count % 500 != 0) {
int[] results = pstmt.executeBatch();
conn.commit();
}
log.info("批量更新完成: table={}, total={}", table, count);
return count;
} catch (SQLException e) {
log.error("批量更新失败: table={}, count={}", table, records.size(), e);
throw new DatabaseException("批量更新失败", e);
}
}
// 构建批量插入SQL
private String buildBatchInsertSql(String table, Set<String> columns) {
StringBuilder sql = new StringBuilder("INSERT INTO ").append(table).append(" (");
// 列名
String columnNames = String.join(", ", columns);
sql.append(columnNames).append(") VALUES (");
// 占位符
String placeholders = columns.stream()
.map(c -> "?")
.collect(Collectors.joining(", "));
sql.append(placeholders).append(")");
return sql.toString();
}
// 构建批量更新SQL
private String buildBatchUpdateSql(String table, Set<String> columns, String keyColumn) {
StringBuilder sql = new StringBuilder("UPDATE ").append(table).append(" SET ");
// 更新列
String updateColumns = columns.stream()
.filter(col -> !col.equals(keyColumn))
.map(col -> col + " = ?")
.collect(Collectors.joining(", "));
sql.append(updateColumns);
// WHERE条件
sql.append(" WHERE ").append(keyColumn).append(" = ?");
return sql.toString();
}
// 启动批处理器
private void startBatchProcessors() {
// 定时批处理
scheduler.scheduleAtFixedRate(() -> {
processTimeoutBatches();
}, batchTimeout, batchTimeout, TimeUnit.MILLISECONDS);
// 监控批处理缓冲区
scheduler.scheduleAtFixedRate(() -> {
monitorBatchBuffers();
}, 30, 30, TimeUnit.SECONDS);
}
// 批处理缓冲区
private class BatchBuffer {
private final String table;
private final int maxSize;
private final List<Map<String, Object>> buffer = new ArrayList<>();
private final List<CompletableFuture<Void>> futures = new ArrayList<>();
private long lastFlushTime = System.currentTimeMillis();
public BatchBuffer(String table, int maxSize) {
this.table = table;
this.maxSize = maxSize;
}
public synchronized void add(Map<String, Object> data, CompletableFuture<Void> future) {
buffer.add(data);
futures.add(future);
// 检查是否需要立即刷新
if (buffer.size() >= maxSize) {
flush();
}
}
public synchronized void flush() {
if (buffer.isEmpty()) {
return;
}
List<Map<String, Object>> currentBuffer = new ArrayList<>(buffer);
List<CompletableFuture<Void>> currentFutures = new ArrayList<>(futures);
buffer.clear();
futures.clear();
lastFlushTime = System.currentTimeMillis();
// 异步执行批处理
CompletableFuture.runAsync(() -> {
try {
int count = batchInsert(table, currentBuffer);
log.info("批处理完成: table={}, count={}", table, count);
// 完成所有future
currentFutures.forEach(future -> future.complete(null));
} catch (Exception e) {
log.error("批处理失败: table={}, count={}", table, currentBuffer.size(), e);
// 异常完成所有future
currentFutures.forEach(future -> future.completeExceptionally(e));
}
});
}
public synchronized boolean shouldFlush() {
return !buffer.isEmpty() &&
(buffer.size() >= maxSize ||
System.currentTimeMillis() - lastFlushTime >= batchTimeout);
}
}
}
数据库连接池批处理优化
// 数据库连接池批处理优化
public class PooledBatchProcessor {
private final HikariDataSource dataSource;
private final ThreadLocal<BatchContext> batchContext = new ThreadLocal<>();
public PooledBatchProcessor(HikariConfig config) {
// 优化连接池配置
config.setMaximumPoolSize(50);
config.setMinimumIdle(10);
config.setConnectionTimeout(30000);
config.setIdleTimeout(600000);
config.setMaxLifetime(1800000);
config.setLeakDetectionThreshold(60000);
// 批处理优化
config.addDataSourceProperty("cachePrepStmts", "true");
config.addDataSourceProperty("prepStmtCacheSize", "250");
config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048");
config.addDataSourceProperty("useServerPrepStmts", "true");
config.addDataSourceProperty("rewriteBatchedStatements", "true");
this.dataSource = new HikariDataSource(config);
}
// 批处理上下文
private class BatchContext {
private final Connection connection;
private final Map<String, PreparedStatement> statements = new HashMap<>();
private final Map<String, Integer> batchCounts = new HashMap<>();
private final int batchSize;
public BatchContext(Connection connection, int batchSize) throws SQLException {
this.connection = connection;
this.batchSize = batchSize;
this.connection.setAutoCommit(false);
}
public void addBatch(String sql, Object... parameters) throws SQLException {
PreparedStatement pstmt = statements.computeIfAbsent(sql, k -> {
try {
return connection.prepareStatement(k);
} catch (SQLException e) {
throw new RuntimeException(e);
}
});
// 设置参数
for (int i = 0; i < parameters.length; i++) {
pstmt.setObject(i + 1, parameters[i]);
}
pstmt.addBatch();
// 更新计数
int count = batchCounts.getOrDefault(sql, 0) + 1;
batchCounts.put(sql, count);
// 检查是否需要执行批处理
if (count >= batchSize) {
executeBatch(sql);
}
}
public void executeBatch(String sql) throws SQLException {
PreparedStatement pstmt = statements.get(sql);
if (pstmt != null) {
int[] results = pstmt.executeBatch();
connection.commit();
batchCounts.put(sql, 0);
log.info("批处理执行完成: sql={}, count={}", sql, results.length);
}
}
public void executeAllBatches() throws SQLException {
for (String sql : statements.keySet()) {
executeBatch(sql);
}
}
public void close() {
try {
executeAllBatches();
connection.commit();
} catch (SQLException e) {
try {
connection.rollback();
} catch (SQLException ex) {
log.error("回滚失败", ex);
}
} finally {
try {
for (PreparedStatement pstmt : statements.values()) {
pstmt.close();
}
connection.close();
} catch (SQLException e) {
log.error("关闭连接失败", e);
}
}
}
}
// 开始批处理会话
public BatchSession beginBatchSession(int batchSize) {
try {
Connection connection = dataSource.getConnection();
BatchContext context = new BatchContext(connection, batchSize);
batchContext.set(context);
return new BatchSession(context);
} catch (SQLException e) {
throw new DatabaseException("开始批处理会话失败", e);
}
}
// 批处理会话
public class BatchSession implements AutoCloseable {
private final BatchContext context;
public BatchSession(BatchContext context) {
this.context = context;
}
public void addBatch(String sql, Object... parameters) {
try {
context.addBatch(sql, parameters);
} catch (SQLException e) {
throw new DatabaseException("添加批处理失败", e);
}
}
public void executeBatch(String sql) {
try {
context.executeBatch(sql);
} catch (SQLException e) {
throw new DatabaseException("执行批处理失败", e);
}
}
public void commit() {
try {
context.executeAllBatches();
} catch (SQLException e) {
throw new DatabaseException("提交批处理失败", e);
}
}
@Override
public void close() {
context.close();
batchContext.remove();
}
}
// 批量插入优化
public int optimizedBatchInsert(String table, List<Map<String, Object>> records) {
if (records.isEmpty()) {
return 0;
}
try (BatchSession session = beginBatchSession(1000)) {
// 构建优化的批量插入SQL
String sql = buildOptimizedInsertSql(table, records.get(0).keySet(), 1000);
// 分批处理
for (int i = 0; i < records.size(); i += 1000) {
int end = Math.min(i + 1000, records.size());
List<Map<String, Object>> batch = records.subList(i, end);
// 为每批构建参数
Object[] params = buildBatchParameters(batch);
session.addBatch(sql, params);
}
session.commit();
log.info("优化批量插入完成: table={}, count={}", table, records.size());
return records.size();
} catch (Exception e) {
log.error("优化批量插入失败: table={}, count={}", table, records.size(), e);
throw new DatabaseException("优化批量插入失败", e);
}
}
// 构建优化的批量插入SQL(MySQL多值插入)
private String buildOptimizedInsertSql(String table, Set<String> columns, int batchSize) {
StringBuilder sql = new StringBuilder("INSERT INTO ").append(table).append(" (");
// 列名
String columnNames = String.join(", ", columns);
sql.append(columnNames).append(") VALUES ");
// 多值插入
String singleValue = "(" + columns.stream().map(c -> "?").collect(Collectors.joining(", ")) + ")";
String allValues = IntStream.range(0, batchSize)
.mapToObj(i -> singleValue)
.collect(Collectors.joining(", "));
sql.append(allValues);
return sql.toString();
}
}
队列缓冲+批处理架构最佳实践
1. 设计原则
// 队列缓冲+批处理设计原则
public class BatchProcessingPrinciples {
/**
* 原则1:合适的批次大小
*/
public void demonstrateBatchSizeOptimization() {
// 不好的做法:批次过大
public class OversizedBatch {
private static final int BATCH_SIZE = 10000; // 太大
public void processBatch(List<Data> items) {
// 可能导致内存溢出和长时间阻塞
}
}
// 好的做法:根据场景选择合适批次大小
public class OptimizedBatch {
// 网络请求:小批次,减少延迟
private static final int NETWORK_BATCH_SIZE = 10;
// 数据库操作:中等批次,平衡性能和内存
private static final int DATABASE_BATCH_SIZE = 100;
// 文件处理:大批次,提高吞吐量
private static final int FILE_BATCH_SIZE = 1000;
// 动态批次大小调整
public int calculateOptimalBatchSize(int availableMemory,
int itemSize,
int processingTime) {
// 基于可用内存和项目大小计算
int memoryBased = availableMemory / (itemSize * 2);
// 基于处理时间调整
int timeBased = processingTime < 100 ? 100 : 50;
return Math.min(memoryBased, timeBased);
}
}
}
/**
* 原则2:超时机制
*/
public void demonstrateTimeoutMechanism() {
// 不好的做法:没有超时机制
public class NoTimeoutBatch {
private final List<Item> buffer = new ArrayList<>();
public void add(Item item) {
buffer.add(item);
if (buffer.size() >= 100) {
flush();
}
// 问题:如果数据量小,可能长时间不刷新
}
}
// 好的做法:双重触发机制
public class TimeoutBatch {
private final List<Item> buffer = new ArrayList<>();
private final int batchSize;
private final long timeoutMillis;
private long lastFlushTime = System.currentTimeMillis();
public TimeoutBatch(int batchSize, long timeoutMillis) {
this.batchSize = batchSize;
this.timeoutMillis = timeoutMillis;
// 启动定时刷新
startPeriodicFlush();
}
public void add(Item item) {
synchronized (buffer) {
buffer.add(item);
// 检查是否需要立即刷新
if (shouldFlush()) {
flush();
}
}
}
private boolean shouldFlush() {
return buffer.size() >= batchSize ||
System.currentTimeMillis() - lastFlushTime >= timeoutMillis;
}
private void startPeriodicFlush() {
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
scheduler.scheduleAtFixedRate(() -> {
synchronized (buffer) {
if (!buffer.isEmpty() && shouldFlush()) {
flush();
}
}
}, timeoutMillis, timeoutMillis, TimeUnit.MILLISECONDS);
}
}
}
/**
* 原则3:错误处理和重试
*/
public void demonstrateErrorHandling() {
// 不好的做法:简单的错误处理
public class SimpleErrorHandling {
public void processBatch(List<Item> items) {
try {
// 批处理逻辑
database.batchInsert(items);
} catch (Exception e) {
log.error("批处理失败", e);
// 问题:没有重试机制,数据丢失
}
}
}
// 好的做法:完善的错误处理
public class RobustErrorHandling {
private static final int MAX_RETRY_ATTEMPTS = 3;
private static final long RETRY_DELAY_MS = 1000;
public void processBatch(List<Item> items) {
int attempt = 0;
while (attempt < MAX_RETRY_ATTEMPTS) {
try {
// 尝试批处理
database.batchInsert(items);
return; // 成功,退出
} catch (Exception e) {
attempt++;
log.error("批处理失败,尝试次数: {}", attempt, e);
if (attempt >= MAX_RETRY_ATTEMPTS) {
// 最终失败,处理失败数据
handleFailedItems(items, e);
return;
}
// 等待重试
try {
Thread.sleep(RETRY_DELAY_MS * attempt);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return;
}
}
}
}
private void handleFailedItems(List<Item> items, Exception e) {
// 1. 记录失败日志
logFailedItems(items, e);
// 2. 发送到死信队列
sendToDeadLetterQueue(items);
// 3. 发送告警
sendAlert(items.size(), e);
}
}
}
/**
* 原则4:背压处理
*/
public void demonstrateBackpressureHandling() {
// 不好的做法:无限制的缓冲
public class UnboundedBuffer {
private final Queue<Item> queue = new ConcurrentLinkedQueue<>();
public void produce(Item item) {
queue.offer(item); // 可能导致内存溢出
}
}
// 好的做法:有界缓冲和背压
public class BackpressureBuffer {
private final BlockingQueue<Item> queue;
private final int capacity;
private final AtomicInteger droppedCount = new AtomicInteger(0);
public BackpressureBuffer(int capacity) {
this.capacity = capacity;
this.queue = new ArrayBlockingQueue<>(capacity);
}
public boolean produce(Item item, long timeout, TimeUnit unit) {
try {
// 尝试放入队列,带超时
boolean offered = queue.offer(item, timeout, unit);
if (!offered) {
// 队列满,执行背压策略
return handleBackpressure(item);
}
return true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return false;
}
}
private boolean handleBackpressure(Item item) {
// 背压策略1:丢弃最老的数据
Item dropped = queue.poll();
if (dropped != null) {
queue.offer(item);
droppedCount.incrementAndGet();
log.warn("背压:丢弃数据,总计丢弃: {}", droppedCount.get());
return true;
}
// 背压策略2:丢弃当前数据
droppedCount.incrementAndGet();
log.warn("背压:丢弃新数据,总计丢弃: {}", droppedCount.get());
return false;
}
}
}
}
2. 性能调优建议
# 队列缓冲+批处理性能调优配置
performance_tuning:
# 批次大小配置
batch_sizes:
network_requests: 10 # 网络请求批次大小
database_operations: 100 # 数据库操作批次大小
file_operations: 1000 # 文件操作批次大小
memory_operations: 10000 # 内存操作批次大小
# 超时配置
timeouts:
batch_timeout_ms: 100 # 批次超时时间
flush_interval_ms: 50 # 刷新间隔
retry_delay_ms: 1000 # 重试延迟
max_retry_attempts: 3 # 最大重试次数
# 内存配置
memory:
max_buffer_size: 100000 # 最大缓冲区大小
buffer_cleanup_threshold: 0.8 # 缓冲区清理阈值
object_pool_size: 1000 # 对象池大小
# 线程池配置
thread_pool:
core_pool_size: 10 # 核心线程数
max_pool_size: 50 # 最大线程数
queue_capacity: 1000 # 队列容量
keep_alive_seconds: 60 # 线程存活时间
# JVM优化
jvm:
heap_size: "4g" # 堆内存大小
gc_type: "G1GC" # 垃圾收集器
gc_max_pause: 200 # GC最大暂停时间
heap_regions_size: "16m" # 堆区域大小
# 数据库批处理优化
database_optimization:
# 连接池配置
hikari:
maximum_pool_size: 50 # 最大连接数
minimum_idle: 10 # 最小空闲连接
connection_timeout: 30000 # 连接超时
idle_timeout: 600000 # 空闲超时
max_lifetime: 1800000 # 最大生命周期
# MySQL优化
mysql:
innodb_buffer_pool_size: "8G" # InnoDB缓冲池
innodb_log_file_size: "2G" # InnoDB日志文件大小
innodb_flush_log_at_trx_commit: 2 # 事务提交刷新策略
query_cache_type: 0 # 查询缓存类型
# PostgreSQL优化
postgresql:
shared_buffers: "2GB" # 共享缓冲区
work_mem: "64MB" # 工作内存
maintenance_work_mem: "256MB" # 维护工作内存
effective_cache_size: "6GB" # 有效缓存大小
# 消息队列批处理优化
message_queue_optimization:
# Kafka配置
kafka:
batch_size: 16384 # 批处理大小
linger_ms: 10 # 延迟发送时间
compression_type: lz4 # 压缩算法
max_request_size: 10485760 # 最大请求大小
buffer_memory: 33554432 # 缓冲区内存
# RabbitMQ配置
rabbitmq:
prefetch_count: 100 # 预取数量
consumer_batch_size: 50 # 消费者批次大小
channel_cache_size: 25 # 通道缓存大小
connection_cache_size: 5 # 连接缓存大小
3. 监控告警配置
# Prometheus队列缓冲+批处理监控配置
groups:
- name: batch_processing_monitoring
rules:
# 批处理队列积压告警
- alert: BatchQueueBacklogHigh
expr: batch_queue_size > 10000
for: 5m
labels:
severity: warning
annotations:
summary: "批处理队列积压严重"
description: "批处理队列 {{ $labels.queue }} 积压数据量 {{ $value }}"
# 批处理延迟告警
- alert: BatchProcessingLatencyHigh
expr: batch_processing_duration_seconds{quantile="0.95"} > 30
for: 3m
labels:
severity: warning
annotations:
summary: "批处理延迟过高"
description: "批处理95分位延迟 {{ $value }}秒"
# 批处理失败率告警
- alert: BatchProcessingFailureRateHigh
expr: rate(batch_processing_failures_total[5m]) / rate(batch_processing_total[5m]) > 0.05
for: 2m
labels:
severity: critical
annotations:
summary: "批处理失败率过高"
description: "批处理失败率 {{ $value | humanizePercentage }}"
# 批处理缓冲区使用率告警
- alert: BatchBufferUsageHigh
expr: batch_buffer_used_bytes / batch_buffer_total_bytes > 0.9
for: 1m
labels:
severity: warning
annotations:
summary: "批处理缓冲区使用率过高"
description: "批处理缓冲区使用率 {{ $value | humanizePercentage }}"
# 批处理重试次数告警
- alert: BatchRetryCountHigh
expr: batch_retry_count > 5
for: 1m
labels:
severity: warning
annotations:
summary: "批处理重试次数过多"
description: "批处理重试次数 {{ $value }}"
# 批处理吞吐量告警
- alert: BatchThroughputLow
expr: rate(batch_processing_total[5m]) < 10
for: 5m
labels:
severity: warning
annotations:
summary: "批处理吞吐量过低"
description: "批处理吞吐量 {{ $value }}/秒"
# 批处理内存使用告警
- alert: BatchMemoryUsageHigh
expr: batch_memory_used_bytes / batch_memory_max_bytes > 0.8
for: 3m
labels:
severity: critical
annotations:
summary: "批处理内存使用率过高"
description: "批处理内存使用率 {{ $value | humanizePercentage }}"
# 批处理线程池饱和告警
- alert: BatchThreadPoolSaturated
expr: batch_thread_pool_active_threads / batch_thread_pool_max_threads > 0.9
for: 2m
labels:
severity: warning
annotations:
summary: "批处理线程池饱和"
description: "批处理线程池使用率 {{ $value | humanizePercentage }}"
队列缓冲+批处理架构演进路径
总结
队列缓冲+批处理架构法则是现代高并发系统设计的核心原则之一。通过深入理解队列缓冲的流量调节作用和批处理的性能优化效果,我们能够为不同的业务场景设计出最适合的架构方案,实现系统性能、稳定性和可扩展性的最佳平衡。
核心原则
- 流量缓冲:通过队列缓冲瞬时流量,保护系统免受过载冲击
- 批量优化:减少单次处理开销,提升整体系统吞吐量
- 异步解耦:降低系统组件间的耦合度,提高可维护性
- 动态调整:根据系统负载动态调整批次大小和处理策略
关键技术
- 队列缓冲:MPSC队列、阻塞队列、优先级队列等缓冲机制
- 批处理算法:批量插入、批量更新、批量删除等优化技术
- 异步处理:事件驱动、消息队列、回调机制等异步模式
- 性能监控:实时监控、动态调优、自适应调整等智能化手段
成功要素
- 合理的批次设计:根据业务特点选择合适的批次大小和超时策略
- 完善的错误处理:建立重试机制和失败数据处理流程
- 有效的背压控制:防止系统过载,保证稳定性
- 持续的性能优化:基于监控数据持续调优系统参数
- 容量规划:提前规划系统容量,支持业务增长
队列缓冲+批处理架构不是简单的技术堆砌,而是需要根据业务特征、性能要求、系统复杂度等因素,设计出最适合的缓冲和批处理策略。
529

被折叠的 条评论
为什么被折叠?



