Spring Boot监控与运维详解

1. 监控与运维概述

监控与运维是确保Spring Boot应用稳定运行的关键环节。通过完善的监控体系,可以实时了解应用状态、性能指标和异常情况,为运维决策提供数据支持。

1.1 监控体系

  • 应用监控:应用性能、响应时间、吞吐量
  • 系统监控:CPU、内存、磁盘、网络
  • 业务监控:业务指标、用户行为、交易量
  • 日志监控:错误日志、访问日志、审计日志

1.2 监控工具

  • Spring Boot Actuator:内置监控端点
  • Micrometer:指标收集框架
  • Prometheus:指标存储和查询
  • Grafana:可视化监控面板
  • ELK Stack:日志分析平台

1.3 核心依赖

<dependencies>
    <!-- Spring Boot Actuator -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-actuator</artifactId>
    </dependency>
    
    <!-- Micrometer Prometheus -->
    <dependency>
        <groupId>io.micrometer</groupId>
        <artifactId>micrometer-registry-prometheus</artifactId>
    </dependency>
    
    <!-- Spring Boot Admin Client -->
    <dependency>
        <groupId>de.codecentric</groupId>
        <artifactId>spring-boot-admin-starter-client</artifactId>
    </dependency>
    
    <!-- Logback -->
    <dependency>
        <groupId>net.logstash.logback</groupId>
        <artifactId>logstash-logback-encoder</artifactId>
    </dependency>
</dependencies>

2. Spring Boot Actuator

2.1 基础配置

# application.yml
management:
  endpoints:
    web:
      exposure:
        include: "*"
      base-path: /actuator
  endpoint:
    health:
      show-details: always
      show-components: always
    info:
      enabled: true
  health:
    defaults:
      enabled: true
    diskspace:
      enabled: true
      threshold: 100MB
  metrics:
    export:
      prometheus:
        enabled: true
    distribution:
      percentiles-histogram:
        http.server.requests: true
      percentiles:
        http.server.requests: 0.5, 0.95, 0.99

2.2 健康检查

package com.example.demo.health;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.actuate.health.Health;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.stereotype.Component;
import com.example.demo.repository.UserRepository;

@Component
public class DatabaseHealthIndicator implements HealthIndicator {
    
    @Autowired
    private UserRepository userRepository;
    
    @Override
    public Health health() {
        try {
            long userCount = userRepository.count();
            return Health.up()
                    .withDetail("database", "Available")
                    .withDetail("userCount", userCount)
                    .build();
        } catch (Exception e) {
            return Health.down()
                    .withDetail("database", "Unavailable")
                    .withDetail("error", e.getMessage())
                    .build();
        }
    }
}

2.3 自定义指标

package com.example.demo.metrics;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicLong;

@Component
public class CustomMetrics {
    
    private final Counter userCreatedCounter;
    private final Timer userProcessingTimer;
    private final AtomicLong activeUsers;
    
    @Autowired
    public CustomMetrics(MeterRegistry meterRegistry) {
        this.userCreatedCounter = Counter.builder("user.created")
                .description("Number of users created")
                .register(meterRegistry);
        
        this.userProcessingTimer = Timer.builder("user.processing.time")
                .description("Time taken to process user operations")
                .register(meterRegistry);
        
        this.activeUsers = meterRegistry.gauge("user.active", new AtomicLong(0));
    }
    
    public void incrementUserCreated() {
        userCreatedCounter.increment();
    }
    
    public void recordUserProcessingTime(Runnable operation) {
        userProcessingTimer.record(operation);
    }
    
    public void setActiveUsers(long count) {
        activeUsers.set(count);
    }
}

3. 日志监控

3.1 日志配置

<!-- logback-spring.xml -->
<configuration>
    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
        <encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
            <providers>
                <timestamp/>
                <logLevel/>
                <loggerName/>
                <message/>
                <mdc/>
                <stackTrace/>
            </providers>
        </encoder>
    </appender>
    
    <appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <file>logs/application.log</file>
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            <fileNamePattern>logs/application.%d{yyyy-MM-dd}.%i.log</fileNamePattern>
            <maxFileSize>100MB</maxFileSize>
            <maxHistory>30</maxHistory>
            <totalSizeCap>3GB</totalSizeCap>
        </rollingPolicy>
        <encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
            <providers>
                <timestamp/>
                <logLevel/>
                <loggerName/>
                <message/>
                <mdc/>
                <stackTrace/>
            </providers>
        </encoder>
    </appender>
    
    <root level="INFO">
        <appender-ref ref="STDOUT"/>
        <appender-ref ref="FILE"/>
    </root>
</configuration>

3.2 结构化日志

package com.example.demo.service;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.stereotype.Service;
import java.util.UUID;

@Service
public class LoggingService {
    
    private static final Logger logger = LoggerFactory.getLogger(LoggingService.class);
    
    public void processUser(String username) {
        String traceId = UUID.randomUUID().toString();
        MDC.put("traceId", traceId);
        MDC.put("username", username);
        
        try {
            logger.info("开始处理用户: {}", username);
            
            // 模拟业务处理
            Thread.sleep(1000);
            
            logger.info("用户处理完成: {}", username);
        } catch (Exception e) {
            logger.error("用户处理失败: {}", username, e);
        } finally {
            MDC.clear();
        }
    }
}

4. 性能监控

4.1 性能指标收集

package com.example.demo.metrics;

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicLong;

@Component
public class PerformanceMetrics {
    
    private final Timer requestTimer;
    private final AtomicLong activeConnections;
    private final AtomicLong totalRequests;
    
    @Autowired
    public PerformanceMetrics(MeterRegistry meterRegistry) {
        this.requestTimer = Timer.builder("http.server.requests")
                .description("HTTP request processing time")
                .register(meterRegistry);
        
        this.activeConnections = meterRegistry.gauge("http.active.connections", new AtomicLong(0));
        this.totalRequests = meterRegistry.gauge("http.total.requests", new AtomicLong(0));
    }
    
    public void recordRequestTime(Runnable operation) {
        requestTimer.record(operation);
    }
    
    public void incrementActiveConnections() {
        activeConnections.incrementAndGet();
    }
    
    public void decrementActiveConnections() {
        activeConnections.decrementAndGet();
    }
    
    public void incrementTotalRequests() {
        totalRequests.incrementAndGet();
    }
}

4.2 性能监控拦截器

package com.example.demo.interceptor;

import com.example.demo.metrics.PerformanceMetrics;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.servlet.HandlerInterceptor;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

@Component
public class PerformanceInterceptor implements HandlerInterceptor {
    
    @Autowired
    private PerformanceMetrics performanceMetrics;
    
    @Override
    public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) {
        performanceMetrics.incrementActiveConnections();
        performanceMetrics.incrementTotalRequests();
        return true;
    }
    
    @Override
    public void afterCompletion(HttpServletRequest request, HttpServletResponse response, Object handler, Exception ex) {
        performanceMetrics.decrementActiveConnections();
    }
}

5. 业务监控

5.1 业务指标收集

package com.example.demo.metrics;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicLong;

@Component
public class BusinessMetrics {
    
    private final Counter userLoginCounter;
    private final Counter userLogoutCounter;
    private final Counter orderCreatedCounter;
    private final AtomicLong onlineUsers;
    
    @Autowired
    public BusinessMetrics(MeterRegistry meterRegistry) {
        this.userLoginCounter = Counter.builder("business.user.login")
                .description("Number of user logins")
                .register(meterRegistry);
        
        this.userLogoutCounter = Counter.builder("business.user.logout")
                .description("Number of user logouts")
                .register(meterRegistry);
        
        this.orderCreatedCounter = Counter.builder("business.order.created")
                .description("Number of orders created")
                .register(meterRegistry);
        
        this.onlineUsers = meterRegistry.gauge("business.online.users", new AtomicLong(0));
    }
    
    public void recordUserLogin() {
        userLoginCounter.increment();
        onlineUsers.incrementAndGet();
    }
    
    public void recordUserLogout() {
        userLogoutCounter.increment();
        onlineUsers.decrementAndGet();
    }
    
    public void recordOrderCreated() {
        orderCreatedCounter.increment();
    }
}

5.2 业务监控服务

package com.example.demo.service;

import com.example.demo.metrics.BusinessMetrics;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class BusinessMonitorService {
    
    @Autowired
    private BusinessMetrics businessMetrics;
    
    public void recordUserLogin(String username) {
        businessMetrics.recordUserLogin();
        // 记录用户登录日志
    }
    
    public void recordUserLogout(String username) {
        businessMetrics.recordUserLogout();
        // 记录用户登出日志
    }
    
    public void recordOrderCreated(Long orderId) {
        businessMetrics.recordOrderCreated();
        // 记录订单创建日志
    }
}

6. 告警系统

6.1 告警配置

# application.yml
management:
  endpoints:
    web:
      exposure:
        include: health,metrics,alerts
  endpoint:
    alerts:
      enabled: true
  health:
    defaults:
      enabled: true
    diskspace:
      threshold: 100MB
    db:
      enabled: true

6.2 告警处理器

package com.example.demo.alert;

import org.springframework.boot.actuate.health.Health;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicLong;

@Component
public class AlertHealthIndicator implements HealthIndicator {
    
    private final AtomicLong errorCount = new AtomicLong(0);
    private final AtomicLong warningCount = new AtomicLong(0);
    
    @Override
    public Health health() {
        long errors = errorCount.get();
        long warnings = warningCount.get();
        
        if (errors > 10) {
            return Health.down()
                    .withDetail("errors", errors)
                    .withDetail("warnings", warnings)
                    .withDetail("status", "CRITICAL")
                    .build();
        } else if (warnings > 5) {
            return Health.up()
                    .withDetail("errors", errors)
                    .withDetail("warnings", warnings)
                    .withDetail("status", "WARNING")
                    .build();
        } else {
            return Health.up()
                    .withDetail("errors", errors)
                    .withDetail("warnings", warnings)
                    .withDetail("status", "HEALTHY")
                    .build();
        }
    }
    
    public void incrementError() {
        errorCount.incrementAndGet();
    }
    
    public void incrementWarning() {
        warningCount.incrementAndGet();
    }
}

7. 监控面板

7.1 Grafana配置

{
  "dashboard": {
    "title": "Spring Boot Application Monitoring",
    "panels": [
      {
        "title": "HTTP Requests",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_server_requests_seconds_count[5m])",
            "legendFormat": "Requests/sec"
          }
        ]
      },
      {
        "title": "Response Time",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, rate(http_server_requests_seconds_bucket[5m]))",
            "legendFormat": "95th percentile"
          }
        ]
      },
      {
        "title": "Active Users",
        "type": "singlestat",
        "targets": [
          {
            "expr": "business_online_users",
            "legendFormat": "Online Users"
          }
        ]
      }
    ]
  }
}

7.2 自定义监控端点

package com.example.demo.controller;

import com.example.demo.metrics.BusinessMetrics;
import com.example.demo.metrics.PerformanceMetrics;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.HashMap;
import java.util.Map;

@RestController
@RequestMapping("/api/monitoring")
public class MonitoringController {
    
    @Autowired
    private BusinessMetrics businessMetrics;
    
    @Autowired
    private PerformanceMetrics performanceMetrics;
    
    @GetMapping("/metrics")
    public Map<String, Object> getMetrics() {
        Map<String, Object> metrics = new HashMap<>();
        
        // 业务指标
        metrics.put("onlineUsers", businessMetrics.getOnlineUsers());
        metrics.put("totalLogins", businessMetrics.getTotalLogins());
        metrics.put("totalOrders", businessMetrics.getTotalOrders());
        
        // 性能指标
        metrics.put("activeConnections", performanceMetrics.getActiveConnections());
        metrics.put("totalRequests", performanceMetrics.getTotalRequests());
        metrics.put("averageResponseTime", performanceMetrics.getAverageResponseTime());
        
        return metrics;
    }
}

8. 运维工具

8.1 健康检查端点

package com.example.demo.health;

import org.springframework.boot.actuate.health.Health;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.stereotype.Component;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.lang.management.RuntimeMXBean;

@Component
public class SystemHealthIndicator implements HealthIndicator {
    
    @Override
    public Health health() {
        MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
        RuntimeMXBean runtimeBean = ManagementFactory.getRuntimeMXBean();
        
        long usedMemory = memoryBean.getHeapMemoryUsage().getUsed();
        long maxMemory = memoryBean.getHeapMemoryUsage().getMax();
        long uptime = runtimeBean.getUptime();
        
        double memoryUsage = (double) usedMemory / maxMemory;
        
        if (memoryUsage > 0.9) {
            return Health.down()
                    .withDetail("memoryUsage", String.format("%.2f%%", memoryUsage * 100))
                    .withDetail("uptime", uptime)
                    .withDetail("status", "CRITICAL")
                    .build();
        } else if (memoryUsage > 0.8) {
            return Health.up()
                    .withDetail("memoryUsage", String.format("%.2f%%", memoryUsage * 100))
                    .withDetail("uptime", uptime)
                    .withDetail("status", "WARNING")
                    .build();
        } else {
            return Health.up()
                    .withDetail("memoryUsage", String.format("%.2f%%", memoryUsage * 100))
                    .withDetail("uptime", uptime)
                    .withDetail("status", "HEALTHY")
                    .build();
        }
    }
}

8.2 运维脚本

#!/bin/bash
# health-check.sh

APP_URL="http://localhost:8080"
HEALTH_ENDPOINT="$APP_URL/actuator/health"
METRICS_ENDPOINT="$APP_URL/actuator/metrics"

echo "=== Spring Boot Application Health Check ==="
echo "Timestamp: $(date)"
echo ""

# 检查应用健康状态
echo "1. Application Health Status:"
curl -s "$HEALTH_ENDPOINT" | jq '.'
echo ""

# 检查关键指标
echo "2. Key Metrics:"
echo "HTTP Requests:"
curl -s "$METRICS_ENDPOINT/http.server.requests" | jq '.'
echo ""

echo "JVM Memory:"
curl -s "$METRICS_ENDPOINT/jvm.memory.used" | jq '.'
echo ""

echo "Thread Count:"
curl -s "$METRICS_ENDPOINT/jvm.threads.live" | jq '.'
echo ""

# 检查日志
echo "3. Recent Logs:"
tail -n 20 logs/application.log
echo ""

echo "=== Health Check Complete ==="

9. 监控最佳实践

9.1 监控配置

# application.yml
management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,prometheus
      base-path: /actuator
  endpoint:
    health:
      show-details: when-authorized
      show-components: always
    info:
      enabled: true
  health:
    defaults:
      enabled: true
    diskspace:
      threshold: 100MB
    db:
      enabled: true
  metrics:
    export:
      prometheus:
        enabled: true
    distribution:
      percentiles-histogram:
        http.server.requests: true
      percentiles:
        http.server.requests: 0.5, 0.95, 0.99
  info:
    env:
      enabled: true
    java:
      enabled: true
    os:
      enabled: true

9.2 监控指标配置

package com.example.demo.config;

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.config.MeterFilter;
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class MonitoringConfig {
    
    @Bean
    public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
        return registry -> registry.config().commonTags("application", "spring-boot-app");
    }
    
    @Bean
    public MeterFilter meterFilter() {
        return MeterFilter.denyNameStartsWith("jvm.threads");
    }
}

10. 总结

Spring Boot监控与运维提供了完整的监控解决方案:

  1. Actuator监控:内置监控端点和健康检查
  2. 指标收集:Micrometer指标收集和Prometheus集成
  3. 日志监控:结构化日志和日志分析
  4. 性能监控:性能指标收集和分析
  5. 业务监控:业务指标和用户行为监控
  6. 告警系统:异常检测和告警通知
  7. 监控面板:Grafana可视化监控
  8. 运维工具:健康检查和运维脚本

通过完善的监控体系,可以确保Spring Boot应用的稳定运行和高效运维。


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

程序员小凯

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值