1、创建监控程序目录
[root@bigdata3 opt]# mkdir /opt/monitor
2、导入软件
[root@bigdata3 monitor]# ll
总用量 321032
-rw-r--r--. 1 root root 23631797 7月 3 2019 alertmanager-0.17.0.linux-amd64.tar.gz
-rw-r--r--. 1 root root 127938445 7月 5 2019 go1.12.5.linux-amd64.tar.gz
-rw-r--r--. 1 root root 58512371 7月 22 2019 grafana-6.2.5.linux-amd64.tar.gz
-rw-r--r--. 1 root root 50120400 7月 16 2019 influxdb-1.7.7_linux_amd64.tar.gz
-rw-r--r--. 1 root root 48497454 7月 3 2019 prometheus-2.10.0.linux-amd64.tar.gz
-rw-r--r--. 1 root root 20021531 7月 16 2019 telegraf-1.11.1_linux_amd64.tar.gz
[root@bigdata3 monitor]# pwd
/opt/monitor
3、解压软件
[root@bigdata3 monitor]# tar xf alertmanager-0.17.0.linux-amd64.tar.gz
4、重命名文件夹
[root@bigdata3 monitor]# mv prometheus-2.10.0.linux-amd64 prometheus
5、修改配置文件
vi prometheus.yml
原始配置文件
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
修改后配置文件
[root@bigdata3 prometheus]# cat prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['192.168.1.5:9093'] # alertmanagers所在地址
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
#################rules#############################
- "/opt/monitor/prometheus/rules/hosts/*.yml" # 告警规则存放目录
#################rules#############################
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['192.168.1.5:9090'] #Prometheus安装机器地址
#################hosts#############################
- job_name: 'dmp_hosts' # 标签用于区分各个监控项目的机器
file_sd_configs:
- files: ['/opt/monitor/prometheus/monitor_config/dmp/*.yml'] # 监控dmp集群的机器配置放置目录
refresh_interval: 5s
#################hosts#############################
6、创建相关文件夹
创建/opt/monitor/prometheus/rules/用于存放告警规则,但实际监控项目居多,故分类存放再创建主机监控项目文件夹hosts、mysql、hdfs
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/rules
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/rules/hosts
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/rules/mysql
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/rules/hdfs
[root@bigdata3 prometheus]# ls /opt/monitor/prometheus/rules/
hdfs hosts mysql
进入到hosts目录创建监控脚本
[root@bigdata3 rules]# cat disk_use.yml
groups:
- name: host_disk
rules:
- alert: NodediskUsage
expr: round(disk_used_percent{kind="jkj"}) > 50
for: 1m
labels:
sort: host_disk
level: severity
annotations:
summary: "{{$labels.instance}}: High disk usage"
description: "disk {{$labels.path}} already use {{ $value }}%,please check it"
创建/opt/monitor/prometheus/monitor_config/用于存放分类后监控的主机,同样为了区分各个项目的机器创建项目子文件夹dmp、xl
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/monitor_config/
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/monitor_config/dmp
[root@bigdata3 prometheus]# mkdir /opt/monitor/prometheus/monitor_config/xl
[root@bigdata3 prometheus]# ls /opt/monitor/prometheus/monitor_config/
dmp xl
进入到dmp目录创建所要监控机器的文件
[root@bigdata3 dmp]# cat 192.168.1.5.yml
- targets: [ "192.168.1.5:9275" ]
labels:
group: "monitor-server"
7、启动Prometheus进程
/opt/monitor/prometheus/prometheus --config.file="/opt/monitor/prometheus/prometheus.yml"> /opt/monitor/prometheus/prometheus.log --web.enable-lifecycle 2>&1 &
启动时加上--web.enable-lifecycle启用远程热加载配置文件