架构之海量结构化数据扩展
引言
在数字化转型的浪潮中,数据规模呈现出爆炸式增长。从亿级到百亿级的库表规模,传统的单体数据库架构已经无法满足现代应用对性能、可用性和扩展性的要求。海量结构化数据扩展架构法则强调:当数据规模达到亿级记录以上时,必须采用分布式数据库架构、数据分片策略、读写分离机制和缓存层设计,通过水平扩展而非垂直扩展来支撑海量数据的存储和访问需求。
这一法则不仅关乎技术选型,更是架构设计的核心理念转变——从"更大更强的机器"转向"更多更智能的节点",从集中式架构转向分布式架构,从单点故障转向高可用集群。
海量数据扩展的核心理念
为什么需要海量数据扩展架构?
海量数据扩展架构能够有效解决上述挑战:
- 线性扩展能力:通过增加节点实现性能和容量的线性增长
- 高可用保障:多副本机制确保单点故障不影响业务
- 性能优化:读写分离、缓存层设计提升系统响应速度
- 成本效益:使用普通硬件构建分布式集群,降低总体成本
- 灵活架构:支持多种数据分片策略和访问模式
数据规模分级与架构演进
| 数据规模 | 记录数量 | 架构特征 | 技术选型 | 关键挑战 |
|---|---|---|---|---|
| 百万级 | < 1M | 单体架构 | MySQL单实例 | 索引优化 |
| 千万级 | 1M-10M | 主从架构 | MySQL主从+缓存 | 查询性能 |
| 亿级 | 10M-100M | 分库分表 | 分布式中间件 | 数据分布 |
| 十亿级 | 100M-1B | 分布式 | NewSQL数据库 | 一致性保障 |
| 百亿级+ | > 1B | 全球分布 | 云原生数据库 | 跨域同步 |
亿级库表规模架构设计
架构设计原则
原则1:数据分片策略
// 数据分片管理器
@Component
public class ShardingManager {
private static final Logger log = LoggerFactory.getLogger(ShardingManager.class);
// 分片配置
private final ShardingConfiguration config;
// 分片路由表
private final Map<String, ShardInfo> shardRoutingTable;
// 分片算法
private final ShardingAlgorithm shardingAlgorithm;
// 元数据管理
private final MetadataManager metadataManager;
public ShardingManager(ShardingConfiguration config) {
this.config = config;
this.shardRoutingTable = new ConcurrentHashMap<>();
this.shardingAlgorithm = createShardingAlgorithm(config.getAlgorithmType());
this.metadataManager = new MetadataManager();
}
/**
* 计算数据分片
*/
public ShardInfo calculateShard(String tableName, Object shardingKey) {
try {
// 1. 获取分片规则
ShardingRule rule = config.getShardingRule(tableName);
if (rule == null) {
throw new ShardingException("No sharding rule found for table: " + tableName);
}
// 2. 计算分片值
String shardValue = shardingAlgorithm.calculate(shardingKey, rule);
// 3. 查找目标分片
ShardInfo shardInfo = findTargetShard(tableName, shardValue);
log.debug("分片计算完成: table={}, key={}, shard={}",
tableName, shardingKey, shardInfo.getShardName());
return shardInfo;
} catch (Exception e) {
log.error("分片计算失败: table={}, key={}", tableName, shardingKey, e);
throw new ShardingException("Failed to calculate shard", e);
}
}
/**
* 范围分片算法
*/
public class RangeShardingAlgorithm implements ShardingAlgorithm {
@Override
public String calculate(Object shardingKey, ShardingRule rule) {
if (!(shardingKey instanceof Number)) {
throw new IllegalArgumentException("Range sharding requires numeric key");
}
long keyValue = ((Number) shardingKey).longValue();
List<RangeShard> rangeShards = rule.getRangeShards();
// 二分查找目标分片
int low = 0, high = rangeShards.size() - 1;
while (low <= high) {
int mid = (low + high) / 2;
RangeShard shard = rangeShards.get(mid);
if (keyValue >= shard.getStartValue() && keyValue <= shard.getEndValue()) {
return shard.getShardName();
} else if (keyValue < shard.getStartValue()) {
high = mid - 1;
} else {
low = mid + 1;
}
}
throw new ShardingException("No suitable shard found for key: " + keyValue);
}
}
/**
* 哈希分片算法
*/
public class HashShardingAlgorithm implements ShardingAlgorithm {
@Override
public String calculate(Object shardingKey, ShardingRule rule) {
String keyStr = shardingKey.toString();
int hashCode = keyStr.hashCode();
int shardCount = rule.getShardCount();
// 处理负数哈希值
if (hashCode < 0) {
hashCode = Math.abs(hashCode);
}
int shardIndex = hashCode % shardCount;
return rule.getShardPrefix() + shardIndex;
}
}
/**
* 一致性哈希分片
*/
public class ConsistentHashShardingAlgorithm implements ShardingAlgorithm {
private final SortedMap<Integer, String> hashRing;
public ConsistentHashShardingAlgorithm() {
this.hashRing = new TreeMap<>();
}
@Override
public String calculate(Object shardingKey, ShardingRule rule) {
String keyStr = shardingKey.toString();
int hash = getHash(keyStr);
// 查找哈希环上的第一个节点
SortedMap<Integer, String> tailMap = hashRing.tailMap(hash);
if (tailMap.isEmpty()) {
// 如果没有找到,返回第一个节点
return hashRing.get(hashRing.firstKey());
}
return tailMap.get(tailMap.firstKey());
}
private int getHash(String key) {
return Math.abs(key.hashCode());
}
public void addShard(String shardName) {
int hash = getHash(shardName);
hashRing.put(hash, shardName);
}
}
}
原则2:读写分离架构
// 读写分离路由器
@Component
public class ReadWriteSplittingRouter {
private static final Logger log = LoggerFactory.getLogger(ReadWriteSplittingRouter.class);
// 主库数据源
private final DataSource masterDataSource;
// 从库数据源列表
private final List<DataSource> slaveDataSources;
// 负载均衡器
private final LoadBalancer loadBalancer;
// 读写分离策略
private final ReadWriteSplittingStrategy strategy;
public ReadWriteSplittingRouter(DataSource masterDataSource,
List<DataSource> slaveDataSources,
ReadWriteSplittingStrategy strategy) {
this.masterDataSource = masterDataSource;
this.slaveDataSources = slaveDataSources;
this.strategy = strategy;
this.loadBalancer = new RoundRobinLoadBalancer();
}
/**
* 获取数据源
*/
public DataSource getDataSource(SQLStatementType statementType,
String sql,
Object[] parameters) {
try {
switch (statementType) {
case INSERT:
case UPDATE:
case DELETE:
// 写操作路由到主库
log.debug("写操作路由到主库: {}", sql);
return masterDataSource;
case SELECT:
// 读操作根据策略路由
return routeReadOperation(sql, parameters);
default:
throw new UnsupportedOperationException("Unsupported statement type: " + statementType);
}
} catch (Exception e) {
log.error("数据源路由失败: type={}, sql={}", statementType, sql, e);
throw new RoutingException("Failed to route data source", e);
}
}
/**
* 读操作路由
*/
private DataSource routeReadOperation(String sql, Object[] parameters) {
// 1. 检查是否需要强制主库
if (strategy.requireMaster(sql, parameters)) {
log.debug("强制路由到主库: {}", sql);
return masterDataSource;
}
// 2. 检查事务上下文
if (TransactionContext.isInTransaction()) {
log.debug("事务中,路由到主库: {}", sql);
return masterDataSource;
}
// 3. 检查数据一致性要求
if (strategy.requireStrongConsistency(sql, parameters)) {
log.debug("强一致性要求,路由到主库: {}", sql);
return masterDataSource;
}
// 4. 路由到从库
DataSource slaveDataSource = loadBalancer.select(slaveDataSources);
log.debug("读操作路由到从库: {}", sql);
return slaveDataSource;
}
/**
* 主从延迟监控
*/
@Scheduled(fixedRate = 30000) // 每30秒检查一次
public void monitorReplicationLag() {
try {
// 获取主库当前位置
MasterStatus masterStatus = getMasterStatus();
// 检查每个从库的延迟
for (int i = 0; i < slaveDataSources.size(); i++) {
DataSource slave = slaveDataSources.get(i);
SlaveStatus slaveStatus = getSlaveStatus(slave);
long lag = calculateReplicationLag(masterStatus, slaveStatus);
log.info("从库{}延迟: {}秒", i, lag);
// 如果延迟过大,标记为不可用
if (lag > strategy.getMaxAcceptableLag()) {
loadBalancer.markUnavailable(i);
log.warn("从库{}延迟过大,标记为不可用: {}秒", i, lag);
} else {
loadBalancer.markAvailable(i);
}
}
} catch (Exception e) {
log.error("主从延迟监控失败", e);
}
}
}
// 负载均衡器
public interface LoadBalancer {
DataSource select(List<DataSource> dataSources);
void markUnavailable(int index);
void markAvailable(int index);
}
// 轮询负载均衡
public class RoundRobinLoadBalancer implements LoadBalancer {
private final AtomicInteger counter = new Atomic(0);
private final Set<Integer> unavailableIndexes = ConcurrentHashMap.newKeySet();
@Override
public DataSource select(List<DataSource> dataSources) {
if (dataSources.isEmpty()) {
throw new IllegalStateException("No available data sources");
}
// 过滤可用的数据源
List<DataSource> availableSources = new ArrayList<>();
for (int i = 0; i < dataSources.size(); i++) {
if (!unavailableIndexes.contains(i)) {
availableSources.add(dataSources.get(i));
}
}
if (availableSources.isEmpty()) {
// 如果没有可用的从库,返回第一个数据源
return dataSources.get(0);
}
// 轮询选择
int index = counter.getAndIncrement() % availableSources.size();
return availableSources.get(index);
}
@Override
public void markUnavailable(int index) {
unavailableIndexes.add(index);
}
@Override
public void markAvailable(int index) {
unavailableIndexes.remove(index);
}
}
原则3:缓存层设计
// 多级缓存架构
@Component
public class MultiLevelCacheManager {
private static final Logger log = LoggerFactory.getLogger(MultiLevelCacheManager.class);
// L1缓存:本地内存缓存
private final Cache<String, Object> localCache;
// L2缓存:Redis分布式缓存
private final RedisTemplate<String, Object> redisTemplate;
// L3缓存:数据库查询缓存
private final Cache<String, Object> dbQueryCache;
// 缓存配置
private final CacheConfiguration config;
public MultiLevelCacheManager(CacheConfiguration config,
RedisTemplate<String, Object> redisTemplate) {
this.config = config;
this.redisTemplate = redisTemplate;
// 初始化本地缓存
this.localCache = Caffeine.newBuilder()
.maximumSize(config.getLocalCacheSize())
.expireAfterWrite(config.getLocalCacheTtl())
.recordStats()
.build();
// 初始化数据库查询缓存
this.dbQueryCache = Caffeine.newBuilder()
.maximumSize(config.getDbQueryCacheSize())
.expireAfterWrite(config.getDbQueryCacheTtl())
.recordStats()
.build();
}
/**
* 多级缓存查询
*/
public <T> T get(String key, Class<T> type, Supplier<T> loader) {
try {
// 1. L1缓存查询
Object localValue = localCache.getIfPresent(key);
if (localValue != null) {
log.debug("L1缓存命中: {}", key);
return type.cast(localValue);
}
// 2. L2缓存查询
Object redisValue = redisTemplate.opsForValue().get(key);
if (redisValue != null) {
log.debug("L2缓存命中: {}", key);
// 回填L1缓存
localCache.put(key, redisValue);
return type.cast(redisValue);
}
// 3. 加载数据
T value = loader.get();
if (value != null) {
// 写入多级缓存
put(key, value);
}
return value;
} catch (Exception e) {
log.error("缓存查询失败: key={}", key, e);
// 降级到直接加载
return loader.get();
}
}
/**
* 缓存写入
*/
public void put(String key, Object value) {
try {
// 写入L1缓存
localCache.put(key, value);
// 写入L2缓存
redisTemplate.opsForValue().set(key, value, config.getRedisTtl());
log.debug("缓存写入完成: {}", key);
} catch (Exception e) {
log.error("缓存写入失败: key={}", key, e);
}
}
/**
* 缓存失效
*/
public void evict(String key) {
try {
// 清除L1缓存
localCache.invalidate(key);
// 清除L2缓存
redisTemplate.delete(key);
log.debug("缓存失效完成: {}", key);
} catch (Exception e) {
log.error("缓存失效失败: key={}", key, e);
}
}
/**
* 批量缓存失效
*/
public void evictBatch(Collection<String> keys) {
try {
// 清除L1缓存
localCache.invalidateAll(keys);
// 清除L2缓存
redisTemplate.delete(keys);
log.debug("批量缓存失效完成: {}个键", keys.size());
} catch (Exception e) {
log.error("批量缓存失效失败", e);
}
}
/**
* 缓存预热
*/
@Scheduled(cron = "0 0 6 * * *") // 每天6点执行
public void preheatCache() {
try {
log.info("开始缓存预热");
// 获取热门数据列表
List<String> hotKeys = getHotDataKeys();
for (String key : hotKeys) {
try {
// 加载数据到缓存
Object value = loadData(key);
if (value != null) {
put(key, value);
}
} catch (Exception e) {
log.error("缓存预热失败: key={}", key, e);
}
}
log.info("缓存预热完成: {}个键", hotKeys.size());
} catch (Exception e) {
log.error("缓存预热任务失败", e);
}
}
/**
* 缓存统计信息
*/
public CacheStatistics getStatistics() {
CacheStats localStats = localCache.stats();
return CacheStatistics.builder()
.localCacheHitCount(localStats.hitCount())
.localCacheMissCount(localStats.missCount())
.localCacheHitRate(localStats.hitRate())
.localCacheSize(localCache.estimatedSize())
.redisKeyCount(getRedisKeyCount())
.build();
}
}
亿级架构实践案例
案例1:电商订单系统
// 电商订单系统架构实现
@Service
@Slf4j
public class EcommerceOrderSystem {
@Autowired
private ShardingManager shardingManager;
@Autowired
private ReadWriteSplittingRouter readWriteRouter;
@Autowired
private MultiLevelCacheManager cacheManager;
@Autowired
private OrderRepository orderRepository;
/**
* 创建订单
*/
@Transactional
public Order createOrder(CreateOrderRequest request) {
try {
// 1. 生成分布式ID
String orderId = generateDistributedId();
// 2. 计算分片
ShardInfo shardInfo = shardingManager.calculateShard("orders", orderId);
log.info("订单分片信息: orderId={}, shard={}", orderId, shardInfo.getShardName());
// 3. 构建订单对象
Order order = Order.builder()
.orderId(orderId)
.userId(request.getUserId())
.productItems(request.getItems())
.totalAmount(calculateTotalAmount(request.getItems()))
.shippingAddress(request.getShippingAddress())
.orderStatus(OrderStatus.PENDING)
.createdAt(LocalDateTime.now())
.updatedAt(LocalDateTime.now())
.build();
// 4. 写入数据库(路由到主库)
DataSource dataSource = readWriteRouter.getDataSource(SQLStatementType.INSERT,
"INSERT INTO orders ...",
new Object[]{orderId});
orderRepository.save(order, shardInfo);
// 5. 缓存订单信息
cacheManager.put("order:" + orderId, order);
// 6. 异步处理后续流程
publishOrderCreatedEvent(order);
log.info("订单创建成功: orderId={}, userId={}", orderId, request.getUserId());
return order;
} catch (Exception e) {
log.error("订单创建失败: userId={}", request.getUserId(), e);
throw new OrderCreationException("Failed to create order", e);
}
}
/**
* 查询订单
*/
public Order getOrder(String orderId) {
try {
// 1. 先查缓存
Order cachedOrder = cacheManager.get("order:" + orderId, Order.class, () -> null);
if (cachedOrder != null) {
log.debug("订单缓存命中: orderId={}", orderId);
return cachedOrder;
}
// 2. 计算分片
ShardInfo shardInfo = shardingManager.calculateShard("orders", orderId);
// 3. 查询数据库(路由到从库)
DataSource dataSource = readWriteRouter.getDataSource(SQLStatementType.SELECT,
"SELECT * FROM orders WHERE order_id = ?",
new Object[]{orderId});
Order order = orderRepository.findById(orderId, shardInfo);
// 4. 写入缓存
if (order != null) {
cacheManager.put("order:" + orderId, order);
}
return order;
} catch (Exception e) {
log.error("订单查询失败: orderId={}", orderId, e);
throw new OrderQueryException("Failed to get order", e);
}
}
/**
* 分页查询用户订单
*/
public PageResult<Order> getUserOrders(String userId, int page, int size) {
try {
// 1. 构建缓存键
String cacheKey = String.format("user:orders:%s:%d:%d", userId, page, size);
// 2. 查询缓存
PageResult<Order> cachedResult = cacheManager.get(cacheKey, PageResult.class, () -> null);
if (cachedResult != null) {
log.debug("用户订单缓存命中: userId={}, page={}", userId, page);
return cachedResult;
}
// 3. 查询多个分片
List<Order> orders = new ArrayList<>();
int totalCount = 0;
// 获取用户相关的所有分片
List<ShardInfo> userShards = shardingManager.getUserShards(userId);
for (ShardInfo shard : userShards) {
// 查询每个分片
PageResult<Order> shardResult = orderRepository.findByUserId(userId, page, size, shard);
orders.addAll(shardResult.getContent());
totalCount += shardResult.getTotalElements();
}
// 4. 合并和排序结果
orders.sort((a, b) -> b.getCreatedAt().compareTo(a.getCreatedAt()));
// 5. 构建分页结果
PageResult<Order> result = PageResult.<Order>builder()
.content(orders)
.totalElements(totalCount)
.totalPages((totalCount + size - 1) / size)
.currentPage(page)
.pageSize(size)
.build();
// 6. 缓存结果
cacheManager.put(cacheKey, result);
return result;
} catch (Exception e) {
log.error("用户订单查询失败: userId={}, page={}", userId, page, e);
throw new OrderQueryException("Failed to get user orders", e);
}
}
/**
* 订单统计查询
*/
public OrderStatistics getOrderStatistics(String userId, LocalDate startDate, LocalDate endDate) {
try {
// 1. 构建缓存键
String cacheKey = String.format("order:stats:%s:%s:%s", userId, startDate, endDate);
// 2. 查询缓存
OrderStatistics cachedStats = cacheManager.get(cacheKey, OrderStatistics.class, () -> null);
if (cachedStats != null) {
log.debug("订单统计缓存命中: userId={}, dateRange={}-{}", userId, startDate, endDate);
return cachedStats;
}
// 3. 查询多个分片并聚合统计
OrderStatistics stats = calculateOrderStatistics(userId, startDate, endDate);
// 4. 缓存结果
cacheManager.put(cacheKey, stats);
return stats;
} catch (Exception e) {
log.error("订单统计查询失败: userId={}, dateRange={}-{}", userId, startDate, endDate, e);
throw new OrderQueryException("Failed to get order statistics", e);
}
}
/**
* 性能测试
*/
public void performanceTest() {
log.info("=== 电商订单系统性能测试 ===");
// 测试不同规模下的性能表现
int[] dataSizes = {10000, 100000, 1000000};
for (int size : dataSizes) {
log.info("测试数据规模: {}", size);
// 写入性能测试
long startTime = System.currentTimeMillis();
for (int i = 0; i < size; i++) {
CreateOrderRequest request = CreateOrderRequest.builder()
.userId("user_" + (i % 10000))
.items(generateOrderItems())
.build();
createOrder(request);
}
long writeTime = System.currentTimeMillis() - startTime;
// 查询性能测试
startTime = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
String orderId = "order_" + (i % size);
getOrder(orderId);
}
long readTime = System.currentTimeMillis() - startTime;
log.info("数据规模: {}, 写入时间: {}ms, 查询时间: {}ms, 平均写入: {}μs, 平均查询: {}ms",
size, writeTime, readTime,
(writeTime * 1000) / size, (double) readTime / 1000);
}
}
}
百亿级库表架构设计
超大规模架构挑战
挑战1:数据一致性保障
// 分布式事务管理器
@Component
public class DistributedTransactionManager {
private static final Logger log = LoggerFactory.getLogger(DistributedTransactionManager.class);
// 事务协调器
private final TransactionCoordinator coordinator;
// 参与者管理器
private final ParticipantManager participantManager;
// 事务日志存储
private final TransactionLogStore logStore;
// 补偿事务处理器
private final CompensationHandler compensationHandler;
public DistributedTransactionManager(TransactionCoordinator coordinator,
ParticipantManager participantManager,
TransactionLogStore logStore) {
this.coordinator = coordinator;
this.participantManager = participantManager;
this.logStore = logStore;
this.compensationHandler = new CompensationHandler();
}
/**
* 执行分布式事务(基于2PC协议)
*/
public <T> T executeDistributedTransaction(DistributedTransaction<T> transaction) {
String transactionId = generateTransactionId();
log.info("开始分布式事务: {}", transactionId);
try {
// 1. 准备阶段
List<TransactionParticipant> participants = transaction.getParticipants();
boolean allPrepared = preparePhase(transactionId, participants);
if (!allPrepared) {
// 准备失败,回滚事务
rollbackPhase(transactionId, participants);
throw new DistributedTransactionException("Transaction preparation failed");
}
// 2. 提交阶段
commitPhase(transactionId, participants);
// 3. 执行业务逻辑
T result = transaction.execute();
log.info("分布式事务执行成功: {}", transactionId);
return result;
} catch (Exception e) {
log.error("分布式事务执行失败: {}", transactionId, e);
throw new DistributedTransactionException("Transaction execution failed", e);
}
}
/**
* 准备阶段
*/
private boolean preparePhase(String transactionId, List<TransactionParticipant> participants) {
log.info("事务准备阶段开始: {}", transactionId);
List<CompletableFuture<Boolean>> prepareFutures = new ArrayList<>();
for (TransactionParticipant participant : participants) {
CompletableFuture<Boolean> future = CompletableFuture.supplyAsync(() -> {
try {
// 记录事务日志
logStore.logPrepare(transactionId, participant);
// 执行参与者准备
boolean prepared = participant.prepare(transactionId);
if (prepared) {
log.debug("参与者准备成功: transactionId={}, participant={}",
transactionId, participant.getName());
} else {
log.warn("参与者准备失败: transactionId={}, participant={}",
transactionId, participant.getName());
}
return prepared;
} catch (Exception e) {
log.error("参与者准备异常: transactionId={}, participant={}",
transactionId, participant.getName(), e);
return false;
}
});
prepareFutures.add(future);
}
// 等待所有参与者准备完成
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
prepareFutures.toArray(new CompletableFuture[0]));
try {
allFutures.get(config.getPrepareTimeout(), TimeUnit.SECONDS);
// 检查所有参与者是否都准备成功
boolean allPrepared = prepareFutures.stream()
.allMatch(future -> {
try {
return future.get();
} catch (Exception e) {
return false;
}
});
log.info("事务准备阶段完成: transactionId={}, allPrepared={}", transactionId, allPrepared);
return allPrepared;
} catch (TimeoutException e) {
log.error("事务准备超时: transactionId={}", transactionId, e);
return false;
} catch (Exception e) {
log.error("事务准备异常: transactionId={}", transactionId, e);
return false;
}
}
/**
* 提交阶段
*/
private void commitPhase(String transactionId, List<TransactionParticipant> participants) {
log.info("事务提交阶段开始: {}", transactionId);
List<CompletableFuture<Void>> commitFutures = new ArrayList<>();
for (TransactionParticipant participant : participants) {
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
try {
// 记录提交日志
logStore.logCommit(transactionId, participant);
// 执行参与者提交
participant.commit(transactionId);
log.debug("参与者提交成功: transactionId={}, participant={}",
transactionId, participant.getName());
} catch (Exception e) {
log.error("参与者提交异常: transactionId={}, participant={}",
transactionId, participant.getName(), e);
// 记录异常,后续处理
throw new RuntimeException("Commit failed for participant: " + participant.getName(), e);
}
});
commitFutures.add(future);
}
// 等待所有参与者提交完成
try {
CompletableFuture.allOf(commitFutures.toArray(new CompletableFuture[0]))
.get(config.getCommitTimeout(), TimeUnit.SECONDS);
log.info("事务提交阶段完成: transactionId={}", transactionId);
} catch (TimeoutException e) {
log.error("事务提交超时: transactionId={}", transactionId, e);
handleCommitTimeout(transactionId, participants);
} catch (Exception e) {
log.error("事务提交异常: transactionId={}", transactionId, e);
handleCommitFailure(transactionId, participants);
}
}
/**
* 回滚阶段
*/
private void rollbackPhase(String transactionId, List<TransactionParticipant> participants) {
log.info("事务回滚阶段开始: transactionId={}", transactionId);
List<CompletableFuture<Void>> rollbackFutures = new ArrayList<>();
for (TransactionParticipant participant : participants) {
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
try {
// 记录回滚日志
logStore.logRollback(transactionId, participant);
// 执行参与者回滚
participant.rollback(transactionId);
log.debug("参与者回滚成功: transactionId={}, participant={}",
transactionId, participant.getName());
} catch (Exception e) {
log.error("参与者回滚异常: transactionId={}, participant={}",
transactionId, participant.getName(), e);
}
});
rollbackFutures.add(future);
}
// 等待所有参与者回滚完成
try {
CompletableFuture.allOf(rollbackFutures.toArray(new CompletableFuture[0]))
.get(config.getRollbackTimeout(), TimeUnit.SECONDS);
log.info("事务回滚阶段完成: transactionId={}", transactionId);
} catch (Exception e) {
log.error("事务回滚异常: transactionId={}", transactionId, e);
}
}
/**
* Saga模式实现
*/
public <T> T executeSaga(SagaTransaction<T> saga) {
String sagaId = generateSagaId();
log.info("开始Saga事务: {}", sagaId);
List<SagaStep> steps = saga.getSteps();
List<SagaStep> completedSteps = new ArrayList<>();
try {
// 正向执行所有步骤
for (SagaStep step : steps) {
log.info("执行Saga步骤: sagaId={}, step={}", sagaId, step.getName());
// 执行正向操作
step.execute();
// 记录完成的步骤
completedSteps.add(step);
log.info("Saga步骤执行成功: sagaId={}, step={}", sagaId, step.getName());
}
// 执行业务逻辑
T result = saga.execute();
log.info("Saga事务执行成功: {}", sagaId);
return result;
} catch (Exception e) {
log.error("Saga事务执行失败: {}, 开始补偿", sagaId, e);
// 逆向执行补偿操作
for (int i = completedSteps.size() - 1; i >= 0; i--) {
SagaStep step = completedSteps.get(i);
try {
log.info("执行Saga补偿步骤: sagaId={}, step={}", sagaId, step.getName());
step.compensate();
log.info("Saga补偿步骤执行成功: sagaId={}, step={}", sagaId, step.getName());
} catch (Exception compensateException) {
log.error("Saga补偿步骤执行失败: sagaId={}, step={}", sagaId, step.getName(),
compensateException);
// 记录补偿失败,需要人工干预
compensationHandler.logCompensationFailure(sagaId, step, compensateException);
}
}
throw new SagaExecutionException("Saga execution failed and compensation completed", e);
}
}
}
挑战2:跨地域数据同步
// 全球数据同步管理器
@Component
public class GlobalDataSynchronizationManager {
private static final Logger log = LoggerFactory.getLogger(GlobalDataSynchronizationManager.class);
// 区域配置
private final Map<String, RegionConfig> regions;
// 同步策略
private final SynchronizationStrategy syncStrategy;
// 冲突解决器
private final ConflictResolver conflictResolver;
// 同步状态监控
private final SyncStatusMonitor syncMonitor;
// 消息队列
private final MessageQueue messageQueue;
public GlobalDataSynchronizationManager(Map<String, RegionConfig> regions,
SynchronizationStrategy syncStrategy) {
this.regions = regions;
this.syncStrategy = syncStrategy;
this.conflictResolver = new ConflictResolver();
this.syncMonitor = new SyncStatusMonitor();
this.messageQueue = new KafkaMessageQueue();
}
/**
* 跨区域数据同步
*/
public void synchronizeData(String tableName, String recordId, SyncOperation operation) {
try {
// 1. 生成分布式ID
String syncId = generateSyncId();
// 2. 构建同步消息
SyncMessage message = SyncMessage.builder()
.syncId(syncId)
.tableName(tableName)
.recordId(recordId)
.operation(operation)
.timestamp(Instant.now())
.sourceRegion(getCurrentRegion())
.build();
// 3. 发布同步消息
publishSyncMessage(message);
log.info("数据同步消息发布成功: syncId={}, table={}, record={}",
syncId, tableName, recordId);
} catch (Exception e) {
log.error("数据同步失败: table={}, record={}", tableName, recordId, e);
throw new DataSynchronizationException("Failed to synchronize data", e);
}
}
/**
* 处理同步消息
*/
@EventListener
public void handleSyncMessage(SyncMessage message) {
String targetRegion = getCurrentRegion();
// 跳过源区域的消息
if (message.getSourceRegion().equals(targetRegion)) {
return;
}
try {
log.info("处理同步消息: syncId={}, from={}, to={}, table={}, record={}",
message.getSyncId(), message.getSourceRegion(), targetRegion,
message.getTableName(), message.getRecordId());
// 1. 获取本地数据版本
LocalDataVersion localVersion = getLocalDataVersion(
message.getTableName(), message.getRecordId());
// 2. 检查版本冲突
if (localVersion != null && localVersion.getVersion() > message.getTimestamp()) {
// 存在冲突,需要解决
ConflictResolution resolution = conflictResolver.resolve(
localVersion, message);
if (resolution.shouldSkip()) {
log.info("跳过同步消息(冲突解决): syncId={}", message.getSyncId());
return;
}
if (resolution.shouldMerge()) {
// 执行数据合并
mergeData(message, localVersion);
return;
}
}
// 3. 应用同步操作
applySyncOperation(message);
// 4. 更新同步状态
syncMonitor.recordSuccess(message);
log.info("同步消息处理成功: syncId={}", message.getSyncId());
} catch (Exception e) {
log.error("同步消息处理失败: syncId={}", message.getSyncId(), e);
syncMonitor.recordFailure(message, e);
// 重试机制
retrySyncMessage(message);
}
}
/**
* 双向同步冲突解决
*/
public class ConflictResolver {
public ConflictResolution resolve(LocalDataVersion localVersion, SyncMessage incomingMessage) {
// 1. 时间戳比较
if (localVersion.getVersion() > incomingMessage.getTimestamp()) {
// 本地版本更新
return handleLocalNewer(localVersion, incomingMessage);
} else if (localVersion.getVersion() < incomingMessage.getTimestamp()) {
// 远程版本更新
return handleRemoteNewer(localVersion, incomingMessage);
} else {
// 版本相同,检查内容差异
return handleSameVersion(localVersion, incomingMessage);
}
}
private ConflictResolution handleLocalNewer(LocalDataVersion local, SyncMessage incoming) {
// 本地数据更新,根据策略决定如何处理
ConflictResolutionStrategy strategy = syncStrategy.getConflictStrategy();
switch (strategy) {
case LAST_WRITE_WINS:
// 最后写入获胜
if (incoming.getTimestamp().isAfter(local.getLastModified())) {
return ConflictResolution.useRemote();
} else {
return ConflictResolution.skip();
}
case MERGE_CHANGES:
// 合并变更
return ConflictResolution.merge();
case SOURCE_PRIORITY:
// 根据源区域优先级
int sourcePriority = getRegionPriority(incoming.getSourceRegion());
int localPriority = getRegionPriority(getCurrentRegion());
if (sourcePriority > localPriority) {
return ConflictResolution.useRemote();
} else {
return ConflictResolution.skip();
}
case MANUAL_RESOLUTION:
// 人工解决
logConflict(local, incoming);
return ConflictResolution.manual();
default:
return ConflictResolution.skip();
}
}
private ConflictResolution handleRemoteNewer(LocalDataVersion local, SyncMessage incoming) {
// 远程数据更新,直接使用远程数据
return ConflictResolution.useRemote();
}
private ConflictResolution handleSameVersion(LocalDataVersion local, SyncMessage incoming) {
// 版本相同,检查内容哈希
String localHash = calculateContentHash(local.getData());
String remoteHash = incoming.getContentHash();
if (!localHash.equals(remoteHash)) {
// 内容不同,需要合并
return ConflictResolution.merge();
}
// 内容相同,跳过同步
return ConflictResolution.skip();
}
}
/**
* 异步同步处理
*/
@Async
public void performAsyncSynchronization(String tableName, String region) {
try {
log.info("开始异步同步: table={}, region={}", tableName, region);
// 1. 获取需要同步的数据
List<SyncRecord> pendingRecords = getPendingSyncRecords(tableName, region);
// 2. 批量处理同步
int batchSize = 1000;
List<List<SyncRecord>> batches = Lists.partition(pendingRecords, batchSize);
for (List<SyncRecord> batch : batches) {
processSyncBatch(batch, region);
}
log.info("异步同步完成: table={}, region={}, records={}",
tableName, region, pendingRecords.size());
} catch (Exception e) {
log.error("异步同步失败: table={}, region={}", tableName, region, e);
}
}
}
挑战3:实时分析处理
// 实时分析引擎
@Component
public class RealtimeAnalyticsEngine {
private static final Logger log = LoggerFactory.getLogger(RealtimeAnalyticsEngine.class);
// 流处理引擎
private final StreamProcessingEngine streamEngine;
// 内存计算框架
private final InMemoryComputingFramework computingFramework;
// 结果存储
private final AnalyticsResultStore resultStore;
// 查询优化器
private final QueryOptimizer queryOptimizer;
public RealtimeAnalyticsEngine(StreamProcessingEngine streamEngine,
InMemoryComputingFramework computingFramework) {
this.streamEngine = streamEngine;
this.computingFramework = computingFramework;
this.resultStore = new AnalyticsResultStore();
this.queryOptimizer = new QueryOptimizer();
}
/**
* 实时数据流处理
*/
public void processDataStream(DataStream<OrderEvent> orderStream) {
try {
// 1. 定义处理拓扑
StreamTopology topology = buildAnalyticsTopology();
// 2. 配置处理参数
StreamConfig config = StreamConfig.builder()
.parallelism(16)
.checkpointInterval(Duration.ofSeconds(30))
.processingGuarantee(ProcessingGuarantee.EXACTLY_ONCE)
.build();
// 3. 提交处理任务
StreamJob job = streamEngine.submitJob("realtime-order-analytics", topology, config);
log.info("实时数据流处理任务提交成功: jobId={}", job.getJobId());
} catch (Exception e) {
log.error("实时数据流处理失败", e);
throw new StreamProcessingException("Failed to process data stream", e);
}
}
/**
* 构建分析拓扑
*/
private StreamTopology buildAnalyticsTopology() {
StreamTopologyBuilder builder = StreamTopologyBuilder.newBuilder();
// 1. 数据源
builder.addSource("order-source", KafkaSource.<OrderEvent>builder()
.topic("order-events")
.consumerGroup("analytics-group")
.deserializer(new OrderEventDeserializer())
.build());
// 2. 数据清洗
builder.addProcessor("data-cleaner", new DataCleanProcessor())
.addSource("order-source");
// 3. 实时聚合
builder.addProcessor("realtime-aggregator", new RealtimeAggregator())
.addProcessor("data-cleaner");
// 4. 结果输出
builder.addSink("result-sink", new AnalyticsResultSink())
.addProcessor("realtime-aggregator");
return builder.build();
}
/**
* 实时聚合处理器
*/
public class RealtimeAggregator implements StreamProcessor<OrderEvent, AnalyticsResult> {
// 时间窗口管理
private final Map<String, TimeWindow> timeWindows;
// 聚合状态存储
private final StateStore<AggregationState> stateStore;
public RealtimeAggregator() {
this.timeWindows = new ConcurrentHashMap<>();
this.stateStore = new RocksDBStateStore<>("analytics-state");
}
@Override
public void process(OrderEvent event, Context context) {
try {
// 1. 确定时间窗口
String windowKey = getWindowKey(event.getTimestamp());
TimeWindow window = timeWindows.computeIfAbsent(windowKey,
k -> new TimeWindow(getWindowStart(event.getTimestamp()),
getWindowEnd(event.getTimestamp())));
// 2. 获取或创建聚合状态
String stateKey = buildStateKey(event);
AggregationState state = stateStore.get(stateKey);
if (state == null) {
state = new AggregationState(window.getStart(), window.getEnd());
}
// 3. 更新聚合状态
updateAggregationState(state, event);
// 4. 保存状态
stateStore.put(stateKey, state);
// 5. 检查窗口是否完成
if (window.isComplete()) {
AnalyticsResult result = buildAnalyticsResult(state);
// 输出结果
context.forward(result);
// 清理状态
stateStore.delete(stateKey);
timeWindows.remove(windowKey);
}
} catch (Exception e) {
log.error("实时聚合处理失败: event={}", event, e);
throw new ProcessingException("Failed to process event", e);
}
}
private void updateAggregationState(AggregationState state, OrderEvent event) {
// 更新订单数量
state.incrementOrderCount();
// 更新订单金额
state.addOrderAmount(event.getAmount());
// 更新商品统计
for (OrderItem item : event.getItems()) {
state.addProductSale(item.getProductId(), item.getQuantity(), item.getAmount());
}
// 更新地域统计
state.addRegionSale(event.getRegion(), event.getAmount());
// 更新时间戳
state.setLastUpdateTime(event.getTimestamp());
}
private AnalyticsResult buildAnalyticsResult(AggregationState state) {
return AnalyticsResult.builder()
.windowStart(state.getWindowStart())
.windowEnd(state.getWindowEnd())
.totalOrders(state.getOrderCount())
.totalAmount(state.getTotalAmount())
.productSales(state.getProductSales())
.regionSales(state.getRegionSales())
.averageOrderAmount(state.getTotalAmount() / state.getOrderCount())
.build();
}
}
/**
* 内存计算查询
*/
public <T> T executeInMemoryQuery(InMemoryQuery<T> query) {
try {
// 1. 查询优化
OptimizedQuery optimizedQuery = queryOptimizer.optimize(query);
// 2. 构建计算任务
ComputingTask<T> task = buildComputingTask(optimizedQuery);
// 3. 分布式执行
ComputingResult<T> result = computingFramework.execute(task);
// 4. 结果合并
T finalResult = mergeResults(result);
return finalResult;
} catch (Exception e) {
log.error("内存计算查询失败", e);
throw new ComputingException("Failed to execute in-memory query", e);
}
}
/**
* 预聚合优化
*/
public void performPreAggregation() {
try {
log.info("开始预聚合优化");
// 1. 定义预聚合维度
List<Dimension> dimensions = Arrays.asList(
new Dimension("time", Granularity.HOUR),
new Dimension("region", Granularity.EXACT),
new Dimension("product_category", Granularity.EXACT),
new Dimension("customer_segment", Granularity.EXACT)
);
// 2. 定义度量指标
List<Metric> metrics = Arrays.asList(
new Metric("order_count", AggregationType.COUNT),
new Metric("total_amount", AggregationType.SUM),
new Metric("avg_amount", AggregationType.AVG),
new Metric("unique_customers", AggregationType.DISTINCT_COUNT)
);
// 3. 执行预聚合
PreAggregationJob job = PreAggregationJob.builder()
.dimensions(dimensions)
.metrics(metrics)
.timeRange(getPreAggregationTimeRange())
.build();
computingFramework.submitPreAggregationJob(job);
log.info("预聚合优化完成");
} catch (Exception e) {
log.error("预聚合优化失败", e);
}
}
/**
* 实时查询API
*/
@RestController
@RequestMapping("/api/analytics")
public class AnalyticsController {
@Autowired
private RealtimeAnalyticsEngine analyticsEngine;
/**
* 实时销售统计
*/
@GetMapping("/sales/realtime")
public ApiResponse<RealtimeSalesStats> getRealtimeSalesStats(
@RequestParam String timeRange,
@RequestParam(required = false) String region,
@RequestParam(required = false) String productCategory) {
try {
// 构建查询
InMemoryQuery<RealtimeSalesStats> query = InMemoryQuery.<RealtimeSalesStats>builder()
.queryType(QueryType.REALTIME_SALES)
.timeRange(parseTimeRange(timeRange))
.filters(buildFilters(region, productCategory))
.build();
// 执行查询
RealtimeSalesStats stats = analyticsEngine.executeInMemoryQuery(query);
return ApiResponse.success(stats);
} catch (Exception e) {
log.error("实时销售统计查询失败", e);
return ApiResponse.error("查询失败");
}
}
/**
* 实时趋势分析
*/
@GetMapping("/trends/realtime")
public ApiResponse<List<TrendData>> getRealtimeTrends(
@RequestParam String metric,
@RequestParam String granularity,
@RequestParam(required = false) String dimension) {
try {
// 构建趋势查询
TrendQuery query = TrendQuery.builder()
.metric(metric)
.granularity(Granularity.valueOf(granularity.toUpperCase()))
.dimension(dimension)
.timeRange(getDefaultTimeRange())
.build();
// 执行查询
List<TrendData> trends = analyticsEngine.executeTrendQuery(query);
return ApiResponse.success(trends);
} catch (Exception e) {
log.error("实时趋势分析查询失败", e);
return ApiResponse.error("查询失败");
}
}
/**
* 实时异常检测
*/
@GetMapping("/anomalies/realtime")
public ApiResponse<List<Anomaly>> detectRealtimeAnomalies(
@RequestParam String metric,
@RequestParam String algorithm) {
try {
// 构建异常检测查询
AnomalyDetectionQuery query = AnomalyDetectionQuery.builder()
.metric(metric)
.algorithm(AnomalyAlgorithm.valueOf(algorithm.toUpperCase()))
.sensitivity(0.95)
.timeWindow(Duration.ofHours(1))
.build();
// 执行异常检测
List<Anomaly> anomalies = analyticsEngine.detectAnomalies(query);
return ApiResponse.success(anomalies);
} catch (Exception e) {
log.error("实时异常检测失败", e);
return ApiResponse.error("检测失败");
}
}
}
}
百亿级架构实践案例
案例1:全球支付系统
// 全球支付系统架构实现
@Service
@Slf4j
public class GlobalPaymentSystem {
@Autowired
private DistributedTransactionManager transactionManager;
@Autowired
private GlobalDataSynchronizationManager syncManager;
@Autowired
private RealtimeAnalyticsEngine analyticsEngine;
@Autowired
private MultiLevelCacheManager cacheManager;
/**
* 处理跨境支付
*/
@Transactional
public PaymentResult processCrossBorderPayment(PaymentRequest request) {
try {
log.info("开始处理跨境支付: paymentId={}, from={}, to={}, amount={}",
request.getPaymentId(), request.getSourceRegion(),
request.getTargetRegion(), request.getAmount());
// 1. 执行分布式事务
PaymentResult result = transactionManager.executeDistributedTransaction(
new DistributedTransaction<PaymentResult>() {
@Override
public List<TransactionParticipant> getParticipants() {
return buildTransactionParticipants(request);
}
@Override
public PaymentResult execute() {
return executePaymentTransaction(request);
}
});
// 2. 同步数据到目标区域
syncManager.synchronizeData("payments", result.getPaymentId(), SyncOperation.INSERT);
// 3. 记录分析数据
recordPaymentAnalytics(request, result);
// 4. 缓存支付结果
cachePaymentResult(result);
log.info("跨境支付处理成功: paymentId={}", result.getPaymentId());
return result;
} catch (Exception e) {
log.error("跨境支付处理失败: paymentId={}", request.getPaymentId(), e);
throw new PaymentProcessingException("Failed to process cross-border payment", e);
}
}
/**
* 实时风控检测
*/
public RiskAssessment performRealtimeRiskAssessment(RiskCheckRequest request) {
try {
// 1. 构建实时查询
InMemoryQuery<RiskAssessment> query = InMemoryQuery.<RiskAssessment>builder()
.queryType(QueryType.RISK_ASSESSMENT)
.filters(buildRiskFilters(request))
.timeRange(getRealtimeTimeRange())
.build();
// 2. 执行内存计算
RiskAssessment assessment = analyticsEngine.executeInMemoryQuery(query);
// 3. 应用风控规则
applyRiskRules(assessment, request);
// 4. 缓存风控结果
cacheRiskAssessment(assessment);
return assessment;
} catch (Exception e) {
log.error("实时风控检测失败", e);
// 降级到保守策略
return createConservativeRiskAssessment(request);
}
}
/**
* 全球账户余额查询
*/
public GlobalAccountBalance getGlobalAccountBalance(String accountId) {
try {
// 1. 构建缓存键
String cacheKey = "global:balance:" + accountId;
// 2. 查询缓存
GlobalAccountBalance cachedBalance = cacheManager.get(cacheKey, GlobalAccountBalance.class, () -> null);
if (cachedBalance != null) {
log.debug("全球账户余额缓存命中: accountId={}", accountId);
return cachedBalance;
}
// 3. 查询各区域余额
List<AccountBalance> regionalBalances = queryRegionalBalances(accountId);
// 4. 计算全球余额
GlobalAccountBalance globalBalance = calculateGlobalBalance(accountId, regionalBalances);
// 5. 缓存结果
cacheManager.put(cacheKey, globalBalance);
return globalBalance;
} catch (Exception e) {
log.error("全球账户余额查询失败: accountId={}", accountId, e);
throw new AccountQueryException("Failed to get global account balance", e);
}
}
/**
* 实时交易统计
*/
public TransactionStatistics getRealtimeTransactionStatistics(String region, Duration timeWindow) {
try {
// 1. 构建实时统计查询
InMemoryQuery<TransactionStatistics> query = InMemoryQuery.<TransactionStatistics>builder()
.queryType(QueryType.TRANSACTION_STATS)
.filters(buildTransactionFilters(region))
.timeRange(new TimeRange(Instant.now().minus(timeWindow), Instant.now()))
.build();
// 2. 执行内存计算
TransactionStatistics stats = analyticsEngine.executeInMemoryQuery(query);
// 3. 添加趋势分析
addTrendAnalysis(stats, region, timeWindow);
return stats;
} catch (Exception e) {
log.error("实时交易统计查询失败: region={}", region, e);
throw new StatisticsException("Failed to get transaction statistics", e);
}
}
/**
* 批量支付处理
*/
@Async
public CompletableFuture<BatchPaymentResult> processBatchPayments(List<PaymentRequest> requests) {
return CompletableFuture.supplyAsync(() -> {
try {
log.info("开始批量支付处理: {}笔", requests.size());
BatchPaymentResult result = BatchPaymentResult.builder()
.batchId(generateBatchId())
.totalRequests(requests.size())
.processedRequests(0)
.successfulRequests(0)
.failedRequests(0)
.build();
// 分批处理
int batchSize = 100;
List<List<PaymentRequest>> batches = Lists.partition(requests, batchSize);
for (List<PaymentRequest> batch : batches) {
BatchPaymentResult batchResult = processPaymentBatch(batch);
// 合并结果
result.setProcessedRequests(result.getProcessedRequests() + batchResult.getProcessedRequests());
result.setSuccessfulRequests(result.getSuccessfulRequests() + batchResult.getSuccessfulRequests());
result.setFailedRequests(result.getFailedRequests() + batchResult.getFailedRequests());
result.getResults().addAll(batchResult.getResults());
}
log.info("批量支付处理完成: batchId={}, success={}, failed={}",
result.getBatchId(), result.getSuccessfulRequests(), result.getFailedRequests());
return result;
} catch (Exception e) {
log.error("批量支付处理失败", e);
throw new BatchProcessingException("Failed to process batch payments", e);
}
});
}
/**
* 性能基准测试
*/
public void performanceBenchmark() {
log.info("=== 全球支付系统性能基准测试 ===");
// 测试不同并发级别下的性能
int[] concurrencyLevels = {10, 50, 100, 500, 1000};
for (int concurrency : concurrencyLevels) {
log.info("测试并发级别: {}", concurrency);
// 创建并发请求
List<CompletableFuture<PaymentResult>> futures = new ArrayList<>();
long startTime = System.currentTimeMillis();
for (int i = 0; i < concurrency; i++) {
PaymentRequest request = PaymentRequest.builder()
.paymentId("perf_test_" + i)
.sourceRegion("US")
.targetRegion("EU")
.amount(new BigDecimal("100.00"))
.currency("USD")
.build();
CompletableFuture<PaymentResult> future = CompletableFuture.supplyAsync(() ->
processCrossBorderPayment(request));
futures.add(future);
}
// 等待所有请求完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
long totalTime = System.currentTimeMillis() - startTime;
// 统计结果
long successCount = futures.stream()
.filter(future -> {
try {
return future.get().isSuccess();
} catch (Exception e) {
return false;
}
})
.count();
double throughput = (concurrency * 1000.0) / totalTime;
double successRate = (double) successCount / concurrency * 100;
log.info("并发级别: {}, 总时间: {}ms, 吞吐量: {} TPS, 成功率: {}%",
concurrency, totalTime, throughput, successRate);
}
}
}
性能优化与最佳实践
性能优化策略
策略1:查询优化
// 查询优化器
@Component
public class QueryOptimizer {
private static final Logger log = LoggerFactory.getLogger(QueryOptimizer.class);
// 执行计划缓存
private final Cache<String, ExecutionPlan> planCache;
// 统计信息管理器
private final StatisticsManager statsManager;
// 索引选择器
private final IndexSelector indexSelector;
// 查询重写器
private final QueryRewriter queryRewriter;
public QueryOptimizer(StatisticsManager statsManager) {
this.statsManager = statsManager;
this.indexSelector = new IndexSelector(statsManager);
this.queryRewriter = new QueryRewriter();
this.planCache = Caffeine.newBuilder()
.maximumSize(1000)
.expireAfterWrite(Duration.ofMinutes(30))
.build();
}
/**
* 优化SQL查询
*/
public OptimizedQuery optimizeQuery(String sql, Object[] parameters) {
try {
// 1. 生成查询签名
String querySignature = generateQuerySignature(sql, parameters);
// 2. 检查执行计划缓存
ExecutionPlan cachedPlan = planCache.getIfPresent(querySignature);
if (cachedPlan != null) {
log.debug("使用缓存的执行计划: signature={}", querySignature);
return buildOptimizedQuery(sql, parameters, cachedPlan);
}
// 3. 解析查询
ParsedQuery parsedQuery = parseQuery(sql);
// 4. 重写查询
RewrittenQuery rewrittenQuery = queryRewriter.rewrite(parsedQuery);
// 5. 选择最优索引
SelectedIndex selectedIndex = indexSelector.selectOptimalIndex(rewrittenQuery);
// 6. 生成执行计划
ExecutionPlan executionPlan = generateExecutionPlan(rewrittenQuery, selectedIndex);
// 7. 缓存执行计划
planCache.put(querySignature, executionPlan);
log.info("查询优化完成: signature={}, index={}, cost={}",
querySignature, selectedIndex.getName(), executionPlan.getEstimatedCost());
return buildOptimizedQuery(sql, parameters, executionPlan);
} catch (Exception e) {
log.error("查询优化失败: sql={}", sql, e);
// 降级到原始查询
return new OptimizedQuery(sql, parameters);
}
}
/**
* 索引选择策略
*/
public class IndexSelector {
public SelectedIndex selectOptimalIndex(RewrittenQuery query) {
// 1. 获取候选索引
List<IndexInfo> candidateIndexes = getCandidateIndexes(query);
// 2. 评估每个索引的成本
List<IndexEvaluation> evaluations = new ArrayList<>();
for (IndexInfo index : candidateIndexes) {
IndexEvaluation evaluation = evaluateIndex(query, index);
evaluations.add(evaluation);
}
// 3. 选择最优索引
IndexEvaluation bestEvaluation = evaluations.stream()
.min(Comparator.comparing(IndexEvaluation::getEstimatedCost))
.orElse(null);
if (bestEvaluation != null) {
return new SelectedIndex(bestEvaluation.getIndex(), bestEvaluation.getAccessPath());
}
// 4. 如果没有合适的索引,考虑全表扫描
return new SelectedIndex(null, AccessPath.TABLE_SCAN);
}
private IndexEvaluation evaluateIndex(RewrittenQuery query, IndexInfo index) {
// 计算索引选择性
double selectivity = calculateSelectivity(query, index);
// 计算索引覆盖度
double coverage = calculateCoverage(query, index);
// 计算访问成本
double accessCost = calculateAccessCost(query, index, selectivity);
// 计算回表成本(如果需要)
double lookupCost = calculateLookupCost(query, index, coverage);
// 总成本
double totalCost = accessCost + lookupCost;
return IndexEvaluation.builder()
.index(index)
.selectivity(selectivity)
.coverage(coverage)
.accessCost(accessCost)
.lookupCost(lookupCost)
.estimatedCost(totalCost)
.build();
}
private double calculateSelectivity(RewrittenQuery query, IndexInfo index) {
// 基于统计信息计算选择性
ColumnStatistics stats = statsManager.getColumnStatistics(index.getColumnName());
if (stats == null) {
return 0.1; // 默认选择性
}
// 根据查询条件计算选择性
QueryCondition condition = query.getCondition(index.getColumnName());
if (condition == null) {
return 1.0;
}
return condition.estimateSelectivity(stats);
}
}
}
策略2:连接池优化
// 智能连接池管理器
@Component
public class SmartConnectionPoolManager {
private static final Logger log = LoggerFactory.getLogger(SmartConnectionPoolManager.class);
// 连接池映射
private final Map<String, HikariDataSource> connectionPools;
// 连接池配置管理器
private final ConnectionPoolConfigManager configManager;
// 性能监控器
private final ConnectionPoolMonitor monitor;
// 动态调整器
private final DynamicPoolAdjuster adjuster;
public SmartConnectionPoolManager(ConnectionPoolConfigManager configManager) {
this.configManager = configManager;
this.connectionPools = new ConcurrentHashMap<>();
this.monitor = new ConnectionPoolMonitor();
this.adjuster = new DynamicPoolAdjuster();
initializeConnectionPools();
}
/**
* 获取数据库连接
*/
public Connection getConnection(String dataSourceName) throws SQLException {
HikariDataSource dataSource = connectionPools.get(dataSourceName);
if (dataSource == null) {
throw new IllegalArgumentException("Data source not found: " + dataSourceName);
}
Connection connection = dataSource.getConnection();
// 记录连接获取
monitor.recordConnectionAcquired(dataSourceName);
// 包装连接以监控使用情况
return new MonitoredConnection(connection, dataSourceName, monitor);
}
/**
* 动态调整连接池
*/
@Scheduled(fixedRate = 60000) // 每分钟检查一次
public void adjustConnectionPools() {
try {
for (Map.Entry<String, HikariDataSource> entry : connectionPools.entrySet()) {
String poolName = entry.getKey();
HikariDataSource pool = entry.getValue();
// 获取当前性能指标
PoolMetrics metrics = monitor.getMetrics(poolName);
// 分析是否需要调整
AdjustmentDecision decision = adjuster.analyze(metrics);
if (decision.shouldAdjust()) {
// 执行调整
adjustPoolSize(pool, decision.getNewSize());
log.info("连接池动态调整: pool={}, oldSize={}, newSize={}, reason={}",
poolName, metrics.getCurrentSize(), decision.getNewSize(),
decision.getReason());
}
}
} catch (Exception e) {
log.error("连接池动态调整失败", e);
}
}
/**
* 连接池监控
*/
public class ConnectionPoolMonitor {
private final Map<String, PoolMetrics> metricsMap;
private final Map<String, AtomicLong> connectionCounters;
public ConnectionPoolMonitor() {
this.metricsMap = new ConcurrentHashMap<>();
this.connectionCounters = new ConcurrentHashMap<>();
}
public void recordConnectionAcquired(String poolName) {
connectionCounters.computeIfAbsent(poolName, k -> new AtomicLong(0))
.incrementAndGet();
}
public void recordConnectionReleased(String poolName) {
connectionCounters.computeIfAbsent(poolName, k -> new AtomicLong(0))
.decrementAndGet();
}
public PoolMetrics getMetrics(String poolName) {
return metricsMap.computeIfAbsent(poolName, k -> new PoolMetrics(poolName));
}
public void updateMetrics(String poolName, HikariPoolMXBean poolMXBean) {
PoolMetrics metrics = getMetrics(poolName);
metrics.setActiveConnections(poolMXBean.getActiveConnections());
metrics.setIdleConnections(poolMXBean.getIdleConnections());
metrics.setTotalConnections(poolMXBean.getTotalConnections());
metrics.setThreadsAwaitingConnection(poolMXBean.getThreadsAwaitingConnection());
// 计算使用率
double usageRate = (double) poolMXBean.getActiveConnections() / poolMXBean.getTotalConnections();
metrics.setUsageRate(usageRate);
// 记录历史数据
metrics.recordHistory();
}
}
/**
* 动态调整决策器
*/
public class DynamicPoolAdjuster {
public AdjustmentDecision analyze(PoolMetrics metrics) {
String poolName = metrics.getPoolName();
// 1. 检查连接使用率
double usageRate = metrics.getUsageRate();
int currentSize = metrics.getTotalConnections();
// 2. 分析历史趋势
List<Double> usageHistory = metrics.getUsageHistory();
double avgUsage = usageHistory.stream()
.mapToDouble(Double::doubleValue)
.average()
.orElse(usageRate);
// 3. 决策逻辑
if (usageRate > 0.9 && avgUsage > 0.8) {
// 使用率高,需要扩容
int newSize = Math.min(currentSize * 2, configManager.getMaxPoolSize(poolName));
return AdjustmentDecision.increase(newSize, "High usage rate: " + usageRate);
} else if (usageRate < 0.2 && avgUsage < 0.3) {
// 使用率低,可以缩容
int newSize = Math.max(currentSize / 2, configManager.getMinPoolSize(poolName));
return AdjustmentDecision.decrease(newSize, "Low usage rate: " + usageRate);
} else if (metrics.getThreadsAwaitingConnection() > 10) {
// 有大量线程等待连接,需要扩容
int newSize = Math.min(currentSize + 10, configManager.getMaxPoolSize(poolName));
return AdjustmentDecision.increase(newSize, "High waiting threads: " +
metrics.getThreadsAwaitingConnection());
}
// 不需要调整
return AdjustmentDecision.noAdjustment();
}
}
}
容量规划与扩展策略
// 容量规划服务
@Service
public class CapacityPlanningService {
private static final Logger log = LoggerFactory.getLogger(CapacityPlanningService.class);
// 容量计算器
private final CapacityCalculator capacityCalculator;
// 性能预测器
private final PerformancePredictor performancePredictor;
// 资源监控器
private final ResourceMonitor resourceMonitor;
// 扩展决策器
private final ScalingDecisionMaker decisionMaker;
public CapacityPlanningService(CapacityCalculator capacityCalculator,
PerformancePredictor performancePredictor,
ResourceMonitor resourceMonitor) {
this.capacityCalculator = capacityCalculator;
this.performancePredictor = performancePredictor;
this.resourceMonitor = resourceMonitor;
this.decisionMaker = new ScalingDecisionMaker();
}
/**
* 容量规划分析
*/
public CapacityPlan analyzeCapacityRequirements(CapacityRequirements requirements) {
try {
log.info("开始容量规划分析: requirements={}", requirements);
// 1. 当前容量评估
CurrentCapacity currentCapacity = assessCurrentCapacity();
// 2. 业务增长预测
GrowthForecast forecast = predictBusinessGrowth(requirements);
// 3. 性能需求分析
PerformanceRequirements perfRequirements = analyzePerformanceRequirements(requirements);
// 4. 容量缺口计算
CapacityGap capacityGap = calculateCapacityGap(currentCapacity, forecast, perfRequirements);
// 5. 扩展方案设计
List<ScalingSolution> solutions = designScalingSolutions(capacityGap);
// 6. 成本效益分析
CostBenefitAnalysis costAnalysis = analyzeCostBenefit(solutions);
// 7. 风险评估
RiskAssessment riskAssessment = assessRisks(solutions);
// 8. 生成容量规划
CapacityPlan plan = CapacityPlan.builder()
.currentCapacity(currentCapacity)
.growthForecast(forecast)
.capacityGap(capacityGap)
.recommendedSolutions(solutions)
.costBenefitAnalysis(costAnalysis)
.riskAssessment(riskAssessment)
.implementationTimeline(createImplementationTimeline(solutions))
.monitoringStrategy(createMonitoringStrategy())
.build();
log.info("容量规划分析完成: plan={}", plan);
return plan;
} catch (Exception e) {
log.error("容量规划分析失败", e);
throw new CapacityPlanningException("Failed to analyze capacity requirements", e);
}
}
/**
* 自动扩展决策
*/
public ScalingDecision makeAutoScalingDecision() {
try {
// 1. 收集当前指标
SystemMetrics currentMetrics = resourceMonitor.getCurrentMetrics();
PerformanceMetrics perfMetrics = resourceMonitor.getPerformanceMetrics();
BusinessMetrics businessMetrics = resourceMonitor.getBusinessMetrics();
// 2. 分析扩展需求
ScalingAnalysis analysis = ScalingAnalysis.builder()
.cpuUsage(currentMetrics.getCpuUsage())
.memoryUsage(currentMetrics.getMemoryUsage())
.diskUsage(currentMetrics.getDiskUsage())
.networkUsage(currentMetrics.getNetworkUsage())
.responseTime(perfMetrics.getAverageResponseTime())
.throughput(perfMetrics.getThroughput())
.errorRate(perfMetrics.getErrorRate())
.queueDepth(businessMetrics.get
}
}
/**
* 容量规划配置
*/
@Configuration
@ConfigurationProperties(prefix = "capacity.planning")
@Data
public class CapacityPlanningProperties {
// 自动扩展配置
private boolean autoScalingEnabled = true;
private int scalingCheckInterval = 300; // 5 minutes
private double scaleOutThreshold = 0.8;
private double scaleInThreshold = 0.3;
private int maxScalingInstances = 10;
private int minScalingInstances = 2;
// 预警配置
private AlertConfig alert = new AlertConfig();
// 监控配置
private MonitoringConfig monitoring = new MonitoringConfig();
@Data
public static class AlertConfig {
private boolean enabled = true;
private String webhookUrl;
private List<String> emailRecipients;
private Duration alertCooldown = Duration.ofMinutes(15);
}
@Data
public static class MonitoringConfig {
private boolean enabled = true;
private int metricsRetentionDays = 30;
private int checkInterval = 60; // 1 minute
private List<String> monitoredMetrics = Arrays.asList(
"cpu_usage", "memory_usage", "disk_usage", "network_usage",
"response_time", "throughput", "error_rate", "queue_depth"
);
}
}
总结
海量结构化数据扩展架构法则是现代大规模系统设计的核心原则之一。通过深入理解分布式架构的本质,结合合理的数据分片策略、读写分离机制、缓存层设计和一致性保障机制,我们能够构建出既能够支撑海量数据存储,又能够提供高性能访问的扩展性架构。
核心原则
- 水平扩展优先:通过增加节点而非升级单机配置来实现扩展
- 数据分片策略:根据业务特征选择合适的分片算法和分片键
- 读写分离架构:通过主从复制和读写分离提升查询性能
- 多级缓存设计:构建本地缓存、分布式缓存和数据库查询缓存
- 一致性保障:在分布式环境下保证数据的最终一致性
- 容量规划管理:提前规划系统容量,支持业务持续增长
关键技术
- 分布式事务:使用2PC、Saga等模式保证跨节点事务一致性
- 数据同步:实现跨区域、跨数据中心的数据同步机制
- 实时分析:通过流处理引擎实现实时数据分析和监控
- 性能优化:查询优化、连接池优化、缓存优化等多维度优化
- 容量规划:基于业务增长预测和性能监控的动态容量规划
- 自动化运维:自动扩展、故障自愈、智能监控等运维能力
成功要素
- 深入理解业务:分析数据特征、访问模式和增长趋势
- 科学架构设计:根据实际场景选择最适合的架构模式
- 渐进式演进:从单体到分布式,逐步演进架构
- 持续性能优化:建立完善的监控体系,持续优化性能
- 容量提前规划:基于业务预测提前规划系统容量
- 团队能力建设:培养团队的分布式系统设计和运维能力
架构演进路径
最佳实践建议
1. 数据建模最佳实践
- 选择合适的分片键:高基数、均匀分布、业务相关
- 避免跨分片查询:尽量将相关数据放在同一分片
- 预留扩展空间:分片数量要预留未来扩展空间
- 考虑数据热点:避免某些分片成为访问热点
2. 性能优化最佳实践
- 索引优化:为分片键和查询条件创建复合索引
- 查询优化:避免全表扫描,使用覆盖索引
- 缓存策略:多级缓存,缓存预热,缓存穿透保护
- 连接池优化:动态调整连接池大小,连接复用
3. 运维管理最佳实践
- 监控告警:全方位监控,及时预警,快速定位
- 自动化运维:自动扩展,故障自愈,智能调度
- 容量规划:基于业务增长的容量预测和规划
- 灾备演练:定期进行灾备演练,验证恢复能力
4. 安全合规最佳实践
- 数据加密:传输加密,存储加密,字段级加密
- 访问控制:细粒度权限控制,审计日志
- 数据脱敏:敏感数据脱敏处理
- 合规审计:满足行业合规要求
常见陷阱与避免方法
| 陷阱类型 | 具体表现 | 避免方法 |
|---|---|---|
| 分片键选择不当 | 数据分布不均,热点问题 | 选择高基数、均匀分布的字段 |
| 跨分片事务 | 性能下降,复杂度增加 | 尽量避免,使用最终一致性 |
| 过度设计 | 架构复杂,维护困难 | 根据实际需求,渐进式演进 |
| 忽视监控 | 问题发现晚,定位困难 | 建立完善的监控告警体系 |
| 容量规划不足 | 扩展不及时,影响业务 | 基于业务增长提前规划 |
技术选型建议
亿级规模技术选型
- 数据库:MySQL + 分库分表中间件(ShardingSphere)
- 缓存:Redis Cluster + 本地缓存(Caffeine)
- 消息队列:Apache Kafka + RocketMQ
- 监控:Prometheus + Grafana + ELK
百亿级规模技术选型
- 数据库:分布式NewSQL(TiDB、OceanBase)
- 缓存:Redis Cluster + Memcached
- 流处理:Apache Flink + Spark Streaming
- 监控:自定义监控平台 + APM工具
1011

被折叠的 条评论
为什么被折叠?



