一、部署文件内容
1、docker-compose-prometheus.yml文件
version: '2'
networks:
monitor:
driver: bridge
services:
prometheus:
image: docker-0.unsee.tech/prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- /home/kekaoxing/Monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- /home/kekaoxing/Monitoring/prometheus/node_down.yml:/etc/prometheus/node_down.yml
ports:
- "9090:9090"
networks:
- monitor
grafana:
image: docker-0.unsee.tech/grafana/grafana
container_name: grafana
hostname: grafana
restart: always
ports:
- "3000:3000"
networks:
- monitor
node-exporter:
image: quay.io/prometheus/node-exporter
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
networks:
- monitor
cadvisor:
image: docker-0.unsee.tech/lagoudocker/cadvisor:v0.37.0
#image:swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/cadvisor/cadvisor:v0.51.0-linuxarm64 ##arm架构的可以用下面这个镜像
container_name: cadvisor
hostname: cadvisor
restart: always
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /sys/fs/cgroup:/sys/fs/cgroup:ro
ports:
- "8088:8088"
networks:
- monitor
2、node_down.yml
groups:
- name: node_down
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
user: test
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
3、prometheus.yml
global:
scrape_interval: 5s
evaluation_interval: 30s
alerting:
alertmanagers:
- static_configs:
- targets: ['10.6.81.21:9093']
rule_files:
- "node_down.yml"
scrape_configs:
- job_name: vllm
static_configs:
- targets:
- '10.6.81.21:7814'
二、启动docker容器
1、使用docker-compose启动
docker-compose -f docker-compose-prometheus.yml up -d
三、访问
1、prometheus界面
浏览器访问http://10.6.81.21:9090/targets
四、配置grafana
1、浏览器访问http://10.6.81.21:3000/
账号密码为admin/admin 登录修改密码为123456
2、配置prometheus连接源

3、保存测试
其他选项保持默认,保存测试,显示successfully即为成果

4、导入dashboard


1142

被折叠的 条评论
为什么被折叠?



