PS:时区和时间的一致性
1、创建/data/apps目录,并且下载 node_exporter-1.4.0.linux-amd64.tar.gz
mkdir -p /data/apps cd /data/apps wget https://github.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz 或 wget https://githubfast.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz
2、解压压缩包到/usr/local,并且重创建软链接
tar -zxvf node_exporter-1.4.0.linux-amd64.tar.gz -C /usr/local cd /usr/local ln -s node_exporter-1.4.0.linux-amd64 node_exporter
3、使用systemctl管理node_exporter
vi /usr/lib/systemd/system/node_exporter.service [Unit] Description=node_exporter Documentation=https://prometheus.io/ After=network.target [Service] Type=simple User=root ExecStart=/usr/local/node_exporter/node_exporter ExecReload=/bin/kill -HUP $MAINPID KillMode=process Restart=on-failure [Install] WantedBy=multi-user.target
4、设置开机自启并且开启node_exporter服务,最后查看服务状态
systemctl enable node_exporter && systemctl start node_exporter && systemctl status node_exporter setenforce 0 iptables放通tcp的9100端口 iptables -I INPUT -p tcp --dport 9100 -j ACCEPT iptables-save
5、在prometheus服务器的prometheus.yml文件追加node节点监控
vi /usr/local/prometheus/prometheus.yml - job_name: "node1" static_configs: - targets: ['被监控设备的IP:9100'] 或者以file_sd_configs方式追加node vi /usr/local/prometheus/prometheus.yml - job_name: "node1" file_sd_configs: - files: - /usr/local/prometheus/target/node1.yml mkdir /usr/local/prometheus/target cd /usr/local/prometheus/target vi node1.yml - targets: - "被监控设备的IP:9100"
6、检查prometheus.yml格式
cd /usr/local/prometheus/ ./promtool check config prometheus.yml
7、热加载prometheus配置
curl -X POST http://127.0.0.1:9090/-/reload
················································································ 脚本方式部署node-exporter ~node-exporter节点上(被监控节点) vi node-exporter.sh #!/bin/bash setenforce 0 && iptables -I INPUT -p tcp --dport 9100 -j ACCEPT && iptables-save && mkdir -p /data/apps > /dev/null && cd /data/apps && wget https://githubfast.com/prometheus/node_exporter/releases/download/v1.4.0/node_exporter-1.4.0.linux-amd64.tar.gz > /dev/null && echo "下载node-exporters压缩包完成" && tar -zxvf node_exporter-1.4.0.linux-amd64.tar.gz -C /usr/local > /dev/null && cd /usr/local && ln -s node_exporter-1.4.0.linux-amd64 node_exporter && echo "[Unit] Description=node_exporter Documentation=https://prometheus.io/ After=network.target [Service] Type=simple User=root ExecStart=/usr/local/node_exporter/node_exporter ExecReload=/bin/kill -HUP $MAINPID KillMode=process Restart=on-failure [Install] WantedBy=multi-user.target" | tee -i /usr/lib/systemd/system/node_exporter.service > /dev/null && systemctl enable node_exporter && systemctl start node_exporter && systemctl status node_exporter sh ./node-exporter.sh ········································································ 卸载node-exporter部署 vi node-exporter_uninstall.sh #!/bin/bash systemctl disable node_exporter && systemctl stop node_exporter && rm -rf /usr/lib/systemd/system/node_exporter.service && setenforce 1 && iptables -D INPUT -p tcp --dport 9100 -j ACCEPT && iptables-save > /dev/null && cd /usr/local && unlink node_exporter && rm -rf node_exporter-1.4.0.linux-amd64 && rm -rf /data/apps/node_exporter-1.4.0.linux-amd64.tar.gz > /dev/null && echo "删除node_exporter完成" sh ./node-exporter_uninstall.sh ········································································· ~prometheus节点上 vi node_join.sh #!/bin/bash #对应被监控的node名称 NODENAME=$1 #对应被监控的IP地址 IPADDRESS=$2 echo " - job_name: "$NODENAME" static_configs: - targets: ["$IPADDRESS:9100"] " | tee -a /usr/local/prometheus/prometheus.yml > /dev/null && /usr/local/prometheus/promtool check config /usr/local/prometheus/prometheus.yml && curl -X POST http://127.0.0.1:9090/-/reload sh ./node_join.sh NODENAME(对应被监控的node名称) IPADDRESS(对应被监控的IP地址)
自定义监控
在/usr/lib/systemd/system/node_exporter.service文件中的ExecStart项添加--collector.textfile.directory参数,监控的文件以.prom结尾
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=root
ExecStart=/usr/local/node_exporter/node_exporter \ --collector.textfile.directory=/var/lib/node_exporter
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target
cat /var/lib/node_exporter/test.prom
# HELP messages_log_test /var/log/messages test.
# TYPE messages_log_test counter
messages_log_test{topic="Aug 16 09:08:23 test-node kernel: cloudprovider invoked test: gfp_mask=0x135chb, order=0, test=996"} 1