文章目录
1. Spring Boot Actuator:生产监控的基石
1.1 Actuator 架构设计
Spring Boot Actuator 采用了分层架构设计:
应用层 (Application Layer)
↓
Actuator 端点层 (Endpoint Layer)
↓
技术指标收集层 (Metrics Collection Layer)
↓
外部系统集成层 (External Systems Integration)
核心组件关系图:
Spring Boot Application
├── HealthEndpoint ← HealthIndicator(s)
├── MetricsEndpoint ← MeterRegistry ← MeterBinders
├── InfoEndpoint ← InfoContributor(s)
├── LoggersEndpoint ← LoggingSystem
├── ConfigurationPropertiesEndpoint
└── Custom Endpoints
1.2 Actuator 的完整依赖配置
<!-- 核心 Actuator 依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- 监控系统集成 -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-influx</artifactId>
</dependency>
<!-- 链路追踪 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
1.3 端点安全配置策略
management:
endpoints:
web:
exposure:
include: "health,info,metrics,prometheus"
exclude: "env,configprops"
base-path: "/internal/actuator"
jmx:
exposure:
include: "*"
endpoint:
health:
enabled: true
show-details: "when_authorized"
show-components: "when_authorized"
roles: "ACTUATOR_ADMIN"
shutdown:
enabled: false
server:
port: 8081
address: "127.0.0.1"
ssl:
enabled: true
key-store: "classpath:keystore.p12"
key-store-password: "${SSL_KEYSTORE_PASSWORD}"
2. 健康检查:多层次深度监控
2.1 健康检查的完整架构
健康检查请求流程:
HTTP Request → HealthEndpoint → CompositeHealth → HealthIndicator(s)
↓
响应构建 ← 状态聚合 ← 组件检查 ← 详细信息收集
2.2 内置健康指示器详解
Spring Boot 提供了丰富的内置健康指示器:
| 健康指示器 | 检查内容 | 配置属性 |
|---|---|---|
| DataSourceHealthIndicator | 数据库连接和查询 | spring.datasource.* |
| RedisHealthIndicator | Redis 连接和 ping | spring.redis.* |
| MongoHealthIndicator | MongoDB 连接和统计 | spring.data.mongodb.* |
| DiskSpaceHealthIndicator | 磁盘空间和阈值 | management.health.diskspace.* |
| ElasticsearchHealthIndicator | ES 集群健康状态 | spring.elasticsearch.* |
| RabbitHealthIndicator | RabbitMQ 连接和状态 | spring.rabbitmq.* |
| CassandraHealthIndicator | Cassandra 连接和查询 | spring.data.cassandra.* |
2.3 自定义健康指示器深度实现
复杂数据库健康检查:
@Component
@Slf4j
public class ComprehensiveDatabaseHealthIndicator implements HealthIndicator {
private final DataSource dataSource;
private final JdbcTemplate jdbcTemplate;
private final ObjectMapper objectMapper;
// 健康检查配置
@Value("${app.health.database.timeout:5}")
private int queryTimeoutSeconds;
@Value("${app.health.database.connection-pool-threshold:0.8}")
private double connectionPoolThreshold;
public ComprehensiveDatabaseHealthIndicator(DataSource dataSource,
ObjectMapper objectMapper) {
this.dataSource = dataSource;
this.jdbcTemplate = new JdbcTemplate(dataSource);
this.objectMapper = objectMapper;
this.jdbcTemplate.setQueryTimeout(queryTimeoutSeconds);
}
@Override
public Health health() {
Map<String, Object> healthDetails = new LinkedHashMap<>();
List<String> errors = new ArrayList<>();
try {
// 1. 基础连接测试
testBasicConnection(healthDetails, errors);
// 2. 关键业务表检查
checkCriticalTables(healthDetails, errors);
// 3. 数据库性能指标
collectPerformanceMetrics(healthDetails, errors);
// 4. 连接池状态检查
checkConnectionPool(healthDetails, errors);
// 5. 数据库配置检查
checkDatabaseConfiguration(healthDetails, errors);
} catch (Exception e) {
log.error("数据库健康检查异常", e);
errors.add("健康检查执行失败: " + e.getMessage());
}
return buildHealthResponse(healthDetails, errors);
}
private void testBasicConnection(Map<String, Object> details, List<String> errors) {
try {
long startTime = System.currentTimeMillis();
Integer result = jdbcTemplate.queryForObject("SELECT 1", Integer.class);
long responseTime = System.currentTimeMillis() - startTime;
details.put("basicConnection", Map.of(
"status", "UP",
"responseTimeMs", responseTime,
"testResult", result
));
if (responseTime > 1000) {
errors.add("数据库响应时间过长: " + responseTime + "ms");
}
} catch (Exception e) {
details.put("basicConnection", Map.of("status", "DOWN"));
errors.add("基础连接测试失败: " + e.getMessage());
}
}
private void checkCriticalTables(Map<String, Object> details, List<String> errors) {
List<String> criticalTables = Arrays.asList("users", "orders", "products");
Map<String, Object> tableStatus = new HashMap<>();
for (String table : criticalTables) {
try {
Long count = jdbcTemplate.queryForObject(
"SELECT COUNT(*) FROM " + table, Long.class);
tableStatus.put(table, Map.of(
"exists", true,
"recordCount", count
));
} catch (Exception e) {
tableStatus.put(table, Map.of("exists", false));
errors.add("关键表检查失败: " + table + " - " + e.getMessage());
}
}
details.put("criticalTables", tableStatus);
}
private void collectPerformanceMetrics(Map<String, Object> details, List<String> errors) {
try {
// 获取数据库版本和状态
String version = jdbcTemplate.queryForObject(
"SELECT VERSION()", String.class);
// 获取活动连接数(MySQL示例)
Integer activeConnections = jdbcTemplate.queryForObject(
"SELECT COUNT(*) FROM information_schema.PROCESSLIST", Integer.class);
// 获取数据库大小(MySQL示例)
Map<String, Object> dbSize = jdbcTemplate.queryForMap(
"SELECT table_schema as 'database', " +
"ROUND(SUM(data_length + index_length) / 1024 / 1024, 2) as 'size_mb' " +
"FROM information_schema.TABLES " +
"GROUP BY table_schema"
);
details.put("performance", Map.of(
"databaseVersion", version,
"activeConnections", activeConnections,
"databaseSize", dbSize
));
} catch (Exception e) {
errors.add("性能指标收集失败: " + e.getMessage());
}
}
private void checkConnectionPool(Map<String, Object> details, List<String> errors) {
if (dataSource instanceof HikariDataSource) {
HikariDataSource hikariDataSource = (HikariDataSource) dataSource;
HikariPoolMXBean pool = hikariDataSource.getHikariPoolMXBean();
int activeConnections = pool.getActiveConnections();
int totalConnections = pool.getTotalConnections();
int idleConnections = pool.getIdleConnections();
double utilization = totalConnections > 0 ?
(double) activeConnections / totalConnections : 0.0;
details.put("connectionPool", Map.of(
"activeConnections", activeConnections,
"idleConnections", idleConnections,
"totalConnections", totalConnections,
"utilization", String.format("%.2f%%", utilization * 100)
));
if (utilization > connectionPoolThreshold) {
errors.add(String.format(
"数据库连接池使用率过高: %.2f%% (阈值: %.2f%%)",
utilization * 100, connectionPoolThreshold * 100
));
}
}
}
private void checkDatabaseConfiguration(Map<String, Object> details, List<String> errors) {
try {
// 检查数据库配置参数
List<Map<String, Object>> dbConfig = jdbcTemplate.queryForList(
"SHOW VARIABLES WHERE Variable_name IN " +
"('max_connections', 'innodb_buffer_pool_size', 'wait_timeout')"
);
details.put("configuration", dbConfig);
} catch (Exception e) {
errors.add("数据库配置检查失败: " + e.getMessage());
}
}
private Health buildHealthResponse(Map<String, Object> details, List<String> errors) {
Health.Builder builder = Health.up();
// 添加详细信息
builder.withDetails(details);
// 如果有错误,降级状态
if (!errors.isEmpty()) {
builder.withDetail("errors", errors);
if (errors.stream().anyMatch(e -> e.contains("DOWN"))) {
return builder.down().build();
} else {
return builder.status("DEGRADED").build();
}
}
return builder.build();
}
}
2.4 健康检查分组与聚合
management:
endpoint:
health:
show-details: "when_authorized"
show-components: "when_authorized"
group:
readiness:
include: "db,redis,diskSpace,externalServices"
show-details: "always"
additional-path: "server:/health/readiness"
liveness:
include: "ping,memory"
show-details: "never"
additional-path: "server:/health/liveness"
external:
include: "paymentService,emailService,smsService"
show-details: "when_authorized"
自定义健康检查组:
@Configuration
public class HealthGroupConfiguration {
@Bean
@ConditionalOnMissingBean
public HealthContributor externalServicesHealthGroup() {
Map<String, HealthContributor> contributors = new HashMap<>();
contributors.put("payment", paymentServiceHealthIndicator());
contributors.put("email", emailServiceHealthIndicator());
contributors.put("sms", smsServiceHealthIndicator());
return new CompositeHealthContributor(contributors);
}
@Bean
public PaymentServiceHealthIndicator paymentServiceHealthIndicator() {
return new PaymentServiceHealthIndicator();
}
// 其他健康指示器...
}
10万+

被折叠的 条评论
为什么被折叠?



