一.安装prometheus
1. docker-compose.yml(prometheus)
version: "3"
services:
prometheus:
image: prom/prometheus
container_name: prometheus
restart: always
volumes:
- ./prometheus:/etc/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.enable-admin-api'
- '--web.enable-lifecycle'
ports:
- 9090:9090
networks:
- monitor
cadvisor:
image: google/cadvisor:latest
container_name: cadvisor
restart: always
volumes:
- /:/rootfs:ro
- /dev/disk/:/dev/disk:ro
- /var/run:/var/run:rw
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8888:8080
networks:
- monitor
grafana:
image: grafana/grafana
container_name: grafana
restart: always
ports:
- 3000:3000
networks:
- monitor
alertmanager:
image: prom/alertmanager
container_name: alertmanager
restart: always
hostname: alertmanager
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
ports:
- "9093:9093"
networks:
- monitor
node-exporter:
image: prom/node-exporter:v1.2.2
container_name: node-exporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker|var/lib/kubelet|var/lib/rancher|run|var/run|var/lib/containers|var/log/containers|var/log/pods|var/lib/kubelet/plugins|var/lib/dockershim)($$|/)'
ports:
- 9100:9100
restart: always
networks:
monitor:
driver: bridge
二. 部署业务
1. docker-compose.yml(project)
version: '3'
services:
nginx:
image: nginx:1.21
container_name: nginx
ports:
- 80:80
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/conf.d:/etc/nginx/conf.d:ro
- ./nginx/html:/usr/share/nginx/html:ro
networks:
- mynetwork
redis:
image: redis:5
container_name: redis
volumes:
- ./redis/data:/data
command:
- "--requirepass password"
- "--bind 0.0.0.0"
- "--maxmemory 512mb"
ports:
- 6379:6379
networks:
- mynetwork
mongo:
image: mongo:4.2.5
container_name: mongo
volumes:
- ./mongo/db:/data/db
ports:
- 27017:27017
command: --auth
env_file:
- .env
environment:
- MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}
networks:
- mynetwork
rabbitmq:
image: rabbitmq:3.7.15-management
container_name: rabbitmq
ports:
- 5672:5672
- 15672:15672
volumes:
- ./rabbitmq/data:/var/lib/rabbitmq
- ./rabbitmq/logs:/var/log/rabbitmq
env_file:
- .env
environment:
- RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER}
- RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS}
networks:
- mynetwork
networks:
mynetwork:
三. 部署exporter
1.docker-compose.yml(exporter)
version: "3"
services:
node-exporter:
image: prom/node-exporter:v1.2.2
container_name: node-exporter
restart: always
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker|var/lib/kubelet|var/lib/rancher|run|var/run|var/lib/containers|var/log/containers|var/log/pods|var/lib/kubelet/plugins|var/lib/dockershim)($$|/)'
ports:
- 9100:9100
nginx-exporter:
image: nginx/nginx-prometheus-exporter:0.10.0
container_name: nginx_exporter
restart: always
command:
- '-nginx.scrape-uri=http://192.168.12.13/stub_status'
ports:
- '9113:9113'
redis-exporter:
image: oliver006/redis_exporter
container_name: redis_exporter
restart: always
ports:
- 9121:9121
environment:
REDIS_ADDR: "192.168.12.13:6379"
REDIS_PASSWORD: password
mongodb-exporter:
image: bitnami/mongodb-exporter:latest
container_name: mongodb-exporter
restart: always
ports:
- 9216:9216
env_file:
- "/data/docker/.env"
environment:
- MONGODB_URI=${MONGODB_URI}
command:
- '--collect-all' #开启所有收集器
- '--compatible-mode' #兼用旧版本指标
rabbitmq-exporter:
image: kbudde/rabbitmq-exporter
container_name: rabbitmq-exporter
restart: always
environment:
- RABBIT_URL=http://192.168.12.13:15672
- RABBIT_USER=admin
- RABBIT_PASSWORD_FILE=/run/secrets/rabbitmq_password
secrets:
- rabbitmq_password
ports:
- 9419:9419
process-exporter:
image: ncabatoff/process-exporter
container_name: process-exporter
restart: always
ports:
- 9256:9256
volumes:
- /proc:/host/proc:ro
- ./config:/config
command:
- '-procfs=/host/proc'
- '-config.path=/config/process.yml'
blackbox-exporter:
image: prom/blackbox-exporter:v0.19.0
container_name: blackbox-exporter
restart: always
volumes:
- ./blackbox:/config
ports:
- 9115:9115
command:
- '--config.file=/config/blackbox.yml'
domain-exporter:
image: caarlos0/domain_exporter
container_name: domain-exporter
restart: always
ports:
- 9222:9222
secrets:
rabbitmq_password:
file: ./rabbitmq_password.txt
四.创建触发器
1. domain
mkdir -p prometheus/rules
cat > prometheus/rules/domain.yml<EOF
groups:
- name: domain_alert_rules
rules:
- alert: 域名检测失败
expr: domain_probe_success == 0
for: 1h
labels:
severity: warning
annotations:
summary: "{{ $labels.instance }}"
description: "{{ $labels.domain }} 域名检测失败"
- alert: 域名过期
expr: domain_expiry_days < 30
for: 2h
labels:
severity: warning
annotations:
summary: "{{ $labels.instance }}"
description: "{{ $labels.domain }} 域名30天后过期"
- alert: 域名过期
expr: domain_expiry_days < 5
for: 1h
labels:
severity: warning
annotations:
summary: "{{ $labels.instance }}"
description: "{{ $labels.domain }} 域名5天后过期"
EOF
2. mysql
cat > prometheus/rules/mysql.yml<EOF
groups:
- name: CPU Usage
rules:
- alert: HighCPUUsage
expr: mysql_global_status_cpu_time_seconds_total / mysql_global_status_uptime_seconds * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: High CPU Usage on MySQL
description: "CPU usage on MySQL is high. Current value: {{ $value }}%"
- alert: HighConnectionCount
expr: mysql_global_status_threads_connected > 200
for: 5m
labels:
severity: critical
annotations:
summary: High Connection Count on MySQL
description: "Number of connections to MySQL is high. Current value: {{ $value }}"
- alert: HighSlowQueries
expr: mysql_global_status_slow_queries > 100
for: 10m
labels:
severity: warning
annotations:
summary: High Slow Queries on MySQL
description: "Number of slow queries on MySQL is high. Current value: {{ $value }}"
- alert: HighReplicationDelay
expr: mysql_slave_status_seconds_behind_master > 300
for: 15m
labels:
severity: warning
annotations:
summary: High Replication Delay on MySQL Slave
description: "Replication delay on MySQL slave is high. Current value: {{ $value }} seconds"
- alert: HighDiskUsage
expr: mysql_global_status_max_used_connections / mysql_global_status_max_connections * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: High Disk Usage on MySQL
description: "MySQL is reaching its max connections limit. Current value: {{ $value }}%"
EOF
3. nginx
cat > prometheus/rules/nginx.yml<EOF
groups:
- name: nginx_alert_rules
rules:
- alert: NginxHighRequestRate
expr: rate(nginx_http_requests_total[5m]) > 100
for: 5m
labels:
severity: warning
annotations:
summary: "High NGINX Request Rate"
description: "The NGINX request rate has exceeded the threshold of 100 requests per second."
- alert: NginxHighErrorRate
expr: rate(nginx_http_requests_total{status=~"5.*"}[1m]) > 10
for: 1m
labels:
severity: critical
annotations:
summary: "High NGINX Error Rate"
description: "The NGINX error rate has exceeded the threshold of 10%."
- alert: NginxSlowResponse
expr: nginx_upstream_response_time{instance="nginx-exporter"} > 1
for: 1m
labels:
severity: warning
annotations:
summary: "Slow NGINX Response"
description: "NGINX is experiencing slow response times (>1s)."
- alert: NginxHighCPUUsage
expr: nginx_worker_cpu_usage > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High NGINX CPU Usage"
description: "NGINX worker processes are experiencing high CPU usage (>80%)."
- alert: NginxHighMemoryUsage
expr: nginx_worker_memory_usage > 90
for: 5m
labels:
severity: warning
annotations:
summary: "High NGINX Memory Usage"
description: "NGINX worker processes are experiencing high memory usage (>90%)."
EOF
4. node_exporter
cat>prometheus/rules/node.yml<EOF
groups:
- name: Node Exporter Alerts
rules:
- alert: HighCPUUsage
expr: 100 * (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) > 90
for: 5m
labels:
severity: warning
annotations:
summary: High CPU Usage detected
description: CPU usage is above 90% for 5 minutes.
- alert: HighMemoryUsage
expr: 100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > 90
for: 5m
labels:
severity: warning
annotations:
summary: High Memory Usage detected
description: Memory usage is above 90% for 5 minutes.
- alert: HighDiskUsage
expr: 100 * (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 10
for: 5m
labels:
severity: warning
annotations:
summary: High Disk Usage detected
description: Disk usage on the root partition is above 90% for 5 minutes.
- alert: HighNetworkTraffic
expr: sum by(instance) (rate(node_network_receive_bytes_total[5m])) > 1e+8
for: 5m
labels:
severity: warning
annotations:
summary: High Network Traffic detected
description: Network traffic is above 100 Mbps for 5 minutes.
- alert: HighSystemLoad
expr: node_load1 > 2
for: 5m
labels:
severity: warning
annotations:
summary: High System Load detected
description: System load average is above 2 for 5 minutes.
- alert: HighNetworkConnections
expr: node_netstat_Tcp_established{state="ESTABLISHED"} > 100
for: 5m
labels:
severity: warning
annotations:
summary: High Network Connections detected
description: Number of established TCP connections is above 100 for 5 minutes.
- alert: HighFileHandlesUsage
expr: 100 * (node_filefd_allocated{instance="localhost"} / node_filefd_maximum{instance="localhost"}) > 80
for: 5m
labels:
severity: warning
annotations:
summary: High File Handles Usage detected
description: File handles usage is above 80% for 5 minutes.
- alert: SystemTimeDesync
expr: abs(node_time_seconds - time()) > 60
for: 5m
labels:
severity: critical
annotations:
summary: System Time Desynchronization detected
description: System time is desynchronized by more than 60 seconds for 5 minutes.
- alert: LowDiskSpace
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 10
for: 5m
labels:
severity: critical
annotations:
summary: Low Disk Space detected
description: Available disk space is below 10% for 5 minutes on /.
- alert: HighDiskIOWaitTime
expr: node_disk_io_time_weighted_average > 10
for: 5m
labels:
severity: warning
annotations:
summary: High Disk IO Wait Time detected
description: Disk IO wait time is above 10 milliseconds for 5 minutes.
EOF
5. rabbitmq
cat >prometheus/rules/rabbitmq.yml<EOF
groups:
- name: rabbitmq_alert_rules
rules:
- alert: RabbitMQHighMemoryUsage
expr: rabbitmq_node_memory_used > 90e6
for: 5m
labels:
severity: warning
annotations:
summary: "High RabbitMQ Memory Usage"
description: "RabbitMQ node memory usage has exceeded 90MB."
- alert: RabbitMQHighConnections
expr: rabbitmq_connections > 100
for: 5m
labels:
severity: warning
annotations:
summary: "High RabbitMQ Connections"
description: "RabbitMQ has more than 100 connections."
- alert: RabbitMQHighQueues
expr: rabbitmq_queues > 10
for: 5m
labels:
severity: warning
annotations:
summary: "High RabbitMQ Queues"
description: "RabbitMQ has more than 10 queues."
- alert: RabbitMQUnacknowledgedMessages
expr: rabbitmq_messages_unacknowledged > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "Unacknowledged RabbitMQ Messages"
description: "RabbitMQ has more than 1000 unacknowledged messages."
- alert: RabbitMQConsumersNotAvailable
expr: rabbitmq_consumers_avail_ratio < 0.9
for: 5m
labels:
severity: critical
annotations:
summary: "Unavailable RabbitMQ Consumers"
description: "RabbitMQ consumers are not available for more than 10% of the time."
- alert: RabbitMQNodeDown
expr: rabbitmq_up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "RabbitMQ Node Down"
description: "RabbitMQ node is down."
EOF
6. redis
cat >prometheus/rules/redis.yml<EOF
groups:
- name: redis_alert_rules
rules:
- alert: RedisHighMemoryUsage
expr: redis_memory_used_bytes > 90e6
for: 5m
labels:
severity: warning
annotations:
summary: "High Redis Memory Usage"
description: "Redis memory usage has exceeded 90MB."
- alert: RedisHighCPUUsage
expr: sum(rate(redis_cpu_sys_seconds_total[1m])) > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "High Redis CPU Usage"
description: "Redis CPU usage has exceeded 80%."
- alert: RedisBlockedClients
expr: redis_blocked_clients > 10
for: 5m
labels:
severity: warning
annotations:
summary: "High number of Blocked Redis Clients"
description: "Redis has more than 10 blocked clients."
- alert: RedisReplicationLag
expr: redis_slave_repl_offset - redis_master_repl_offset > 10000
for: 5m
labels:
severity: warning
annotations:
summary: "Redis Replication Lag"
description: "Redis replication lag is greater than 10,000."
- alert: RedisEvictedKeys
expr: redis_keyspace_hits == 0 and redis_keyspace_misses > 0
for: 5m
labels:
severity: critical
annotations:
summary: "Redis Evicted Keys"
description: "Redis is evicting keys without serving any hits."
- alert: RedisClusterNodesDown
expr: redis_up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Redis Cluster Nodes Down"
description: "One or more Redis cluster nodes are down."
EOF
五. prometheus config
1. 静态方式
cat >/prometheus/prometheus.yml<EOF
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 'alertmanager:9093'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules/*.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "cadvisor"
# 覆盖全局默认值,每15秒从该作业刮取一次目标
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.12:8888"]
- job_name: "node-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.12:9100"]
labels:
instance: "prometheus-server"
- targets: ["192.168.12.13:9100"]
labels:
instance: "test-server"
- job_name: "nginx-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.13:9113"]
labels:
instance: "test-server"
- targets: ["192.168.12.13:8000"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
replacement: "test-server"
- job_name: "redis-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.13:9121"]
labels:
instance: "test-server"
- job_name: "rabbitmq-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.13:9419"]
labels:
instance: "test-server"
- job_name: "mysqld-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.12:9104"]
labels:
instance: "prom-server"
- job_name: "process-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.13:9256"]
labels:
instance: "test-server"
- job_name: "mongodb-exporter"
scrape_interval: 15s
static_configs:
- targets: ["192.168.12.13:9216"]
labels:
instance: "test-server"
- job_name: "blackbox-tcp"
metrics_path: /probe
params:
module: [tcp_connect]
static_configs:
- targets:
- 192.168.12.12:22
- 192.168.12.13:22
- 192.168.12.12:9090
- 192.168.12.12:9216
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "blackbox-icmp"
metrics_path: /probe
params:
module: [icmp]
static_configs:
- targets:
- 192.168.12.12
- 192.168.12.13
- 192.168.12.14
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "blackbox-http"
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- http://prometheus.io
- https://www.baidu.com
- https://www.jd.com
- http://192.168.12.13
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "domain"
metrics_path: /probe
static_configs:
- targets:
- baidu.com
- jd.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9222
EOF
web重载prometheus(静态方式下每变更一次需要重载)
curl -X POST http://localhost:9090/-/reload
2. 文件服务发现
- 创建targets文件加
mkdir -p prometheus/targets
cat>prometheus/targets/target.yml<EOF
- targets:
- "192.168.12.12:8888"
labels:
- env: test
- instance: test-server
- targets:
- baidu.com
- jd.com
- qq.com
- targets:
- 92.168.12.13:9216
labels:
instance: "test-server"
- targets:
- 192.168.12.12:9104
labels:
instance: "prom-server"
- targets:
- 192.168.12.13:9113
labels:
instance: "test-server"
- targets:
- 192.168.12.12:9100
labels:
instance: "prometheus-server"
- targets:
- 192.168.12.13:9100
labels:
instance: "test-server"
- targets:
- 192.168.12.13:9419
labels:
instance: "test-server"
- targets:
- 192.168.12.13:9121
labels:
instance: "test-server"
EOF
2 创建config文件
cat >prometheus/prometheus.yml<EOF
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 'alertmanager:9093'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules/*.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "file-sd-cadvisor"
# 覆盖全局默认值,每15秒从该作业刮取一次目标
file_sd_configs:
- files:
- targets/casvisor.yml
refresh_interval: 10s
- job_name: "file-node-exporter"
file_sd_configs:
- files:
- targets/node.yml
refresh_interval: 10s
- job_name: "file-nginx-exporter"
file_sd_configs:
- files:
- targets/nginx.yml
refresh_interval: 10s
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
replacement: "test-server"
- job_name: "file-redis-exporter"
file_sd_configs:
- files:
- targets/redis.yml
refresh_interval: 10s
- job_name: "file-rabbitmq-exporter"
file_sd_configs:
- files:
- targets/rabbitmq.yml
refresh_interval: 10s
- job_name: "file-mysqld-exporter"
file_sd_configs:
- files:
- targets/mysql.yml
refresh_interval: 10s
- job_name: "file-process-exporter"
file_sd_configs:
- files:
- targets/process.yml
refresh_interval: 10s
- job_name: "file-mongodb-exporter"
file_sd_configs:
- files:
- targets/mongodb.yml
refresh_interval: 10s
- job_name: "file-blackbox-tcp"
file_sd_configs:
- files:
- targets/blackbox-tcp.yml
refresh_interval: 10s
metrics_path: /probe
params:
module: [tcp_connect]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "file-blackbox-icmp"
file_sd_configs:
- files:
- targets/blackbox-icmp.yml
refresh_interval: 10s
metrics_path: /probe
params:
module: [icmp]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "file-blackbox-http"
file_sd_configs:
- files:
- targets/blackbox-http.yml
refresh_interval: 10s
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9115
- job_name: "file-domain"
file_sd_configs:
- files:
- targets/domain.yml
refresh_interval: 10s
metrics_path: /probe
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.12.13:9222
EOF
重载一次,之后增删target无需重启prom
curl -X POST http://localhost:9090/-/reload
六. relabeling机制
1. 机制
在 Prometheus 的配置文件中,relabeling 是通过 relabel_configs 字段进行配置的。
relabel_configs 是一个列表,其中每个元素都定义了一个 relabeling 规则。下面是一些常见的 relabeling 配置参数
2. 参数
source_labels: 表示要匹配的源标签列表,用于确定要应用规则的标签。
target_label: 表示要写入的目标标签名称。
replacement: 表示替换目标标签中匹配到的源标签值的字符串或正则表达式。
regex: 指示是否使用正则表达式进行匹配的布尔值(true 或 false)。
action: 表示要执行的操作类型,可以是 replace(替换)、keep(保留)、drop(移除)或 labelmap(标签映射)。
separator: 表示多个值之间的分隔符,用于将多个源标签的值合并为一个目标标签的值。
regex_flags: 指定在正则表达式匹配时要使用的标志,例如 ignorecase(忽略大小写)。
3. 案例
scrape_configs:
- job_name: 'example_job'
static_configs:
- targets: ['example-target:9090']
metric_relabel_configs:
- source_labels: [instance]
target_label: instance_rewrite
replacement: 'new_instance_name'
action: replace
在上面的示例中,我们定义了一个名为 example_job 的作业,并指定了一个静态目标 example-target:9090。然后,我们使用 relabeling 对抓取的目标进行重新配置。我们将源标签 instance 的值替换为 'new_instance_name',并将结果写入目标标签 instance_rewrite 中,使用操作类型 replace 来执行替换操作。
通过适当配置 relabel_configs,您可以根据不同的需求对目标进行灵活的修改和过滤。这使得 Prometheus 可以根据特定的条件选择要抓取的目标,并对抓取的数据进行必要的处理和转换。
请注意,上述示例通过 metric_relabel_configs 配置了 relabeling,这适用于修改指标标签。如果您想要修改目标标签,可以使用 relabel_configs 配置。
七. 案例
1. 添加或替换目标标签
- source_labels: [source_label]
target_label: target_label
replacement: replacement_value
action: replace
这个案例中,源标签 source_label 的值经过替换为 replacement_value,并写入到目标标签 target_label 中。action 参数设置为 replace。
2. 删除目标标签
- source_labels: [source_label]
action: drop
在这个案例中,源标签 source_label 的值将被删除,因为 action 参数设置为 drop。
3. 使用正则表达式匹配和替换
- source_labels: [source_label]
target_label: target_label
regex: (pattern)
replacement: replacement_value
action: replace
在这个案例中,source_label 的值将使用正则表达式 (pattern) 进行匹配。如果匹配成功,将以 replacement_value 替换,并写入到目标标签 target_label 中。
4. 根据表达式保留目标标签
- source_labels: [source_label]
regex: (pattern)
action: keep
在这个案例中,只有当 source_label 的值匹配正则表达式 (pattern) 时,目标标签将保留(不删除),因为 action 参数设置为 keep。
5. 多个源标签合并为一个目标标签
- source_labels: [source_label1, source_label2]
target_label: target_label
separator: separator_value
action: replace
在这个案例中,多个源标签 source_label1 和 source_label2 的值将以 separator_value 分隔符进行合并,并写入到目标标签 target_label 中。
6. 根据标签值进行过滤
- source_labels: [source_label]
regex: ^value.*
action: keep
在这个案例中,只有当 source_label 的值以 “value” 开头时,目标标签将保留 (不删除)。
7. 使用关键字匹配替换
- source_labels: [source_label]
target_label: target_label
replacement: replacement_value
action: replace
regex: (keyword1|keyword2)
在这个案例中,source_label 的值将与关键字 “keyword1” 和 “keyword2” 进行匹配。如果匹配成功,将以 replacement_value 替换,并写入到目标标签 target_label 中。
8. 多对多的关联替换
- source_labels: [source_label1, source_label2]
target_label: target_label
replacement: replacement_value
action: replace
separator: ","
在这个案例中,多个源标签 source_label1 和 source_label2 的值将以逗号 “,” 进行分隔,然后以 replacement_value 进行替换,并写入到目标标签 target_label 中。
9. 目标标签名的重命名
- source_labels: [source_label]
target_label: new_target_label
action: keep
在这个案例中,源标签 source_label 的值将保留,同时将写入到目标标签 new_target_label 中。
e_label2]
target_label: target_label
separator: separator_value
action: replace
在这个案例中,多个源标签 source_label1 和 source_label2 的值将以 separator_value 分隔符进行合并,并写入到目标标签 target_label 中。
6. 根据标签值进行过滤
- source_labels: [source_label]
regex: ^value.*
action: keep
在这个案例中,只有当 source_label 的值以 “value” 开头时,目标标签将保留 (不删除)。
7. 使用关键字匹配替换
- source_labels: [source_label]
target_label: target_label
replacement: replacement_value
action: replace
regex: (keyword1|keyword2)
在这个案例中,source_label 的值将与关键字 “keyword1” 和 “keyword2” 进行匹配。如果匹配成功,将以 replacement_value 替换,并写入到目标标签 target_label 中。
8. 多对多的关联替换
- source_labels: [source_label1, source_label2]
target_label: target_label
replacement: replacement_value
action: replace
separator: ","
在这个案例中,多个源标签 source_label1 和 source_label2 的值将以逗号 “,” 进行分隔,然后以 replacement_value 进行替换,并写入到目标标签 target_label 中。
9. 目标标签名的重命名
- source_labels: [source_label]
target_label: new_target_label
action: keep
在这个案例中,源标签 source_label 的值将保留,同时将写入到目标标签 new_target_label 中。