【判断服务进程是否挂掉的Prometheus监控脚本】

向pushgateway中推送数据

shell脚本实现

#!/bin/bash

# Pushgateway 配置
PUSHGATEWAY_URL="http://your-pushgateway:9091"
JOB_NAME="remote_process_monitor"
TMP_METRICS_FILE="/tmp/remote_process_monitor.prom"
> "$TMP_METRICS_FILE"

# 主机和进程配置(可自定义)
# 格式: "hostname_or_ip process1,process2,..."
declare -A HOSTS
HOSTS["host1"]="nginx,mysqld"
HOSTS["host2"]="redis-server"
HOSTS["host3"]="java,kafka"
HOSTS["host4"]="your_process"

# 构建监控数据
echo "# TYPE process_up gauge" >> "$TMP_METRICS_FILE"

for HOST in "${!HOSTS[@]}"; do
    PROCESS_LIST=${HOSTS[$HOST]}
    
    for PROCESS in $(echo "$PROCESS_LIST" | tr ',' ' '); do
        # 远程执行:判断进程是否存在
        STATUS=$(ssh -o ConnectTimeout=5 "$HOST" "pgrep -f \"$PROCESS\" > /dev/null && echo 1 || echo 0")

        # 写入 metrics 文件
        echo "process_up{host=\"$HOST\",process=\"$PROCESS\"} $STATUS" >> "$TMP_METRICS_FILE"
    done
done

# 推送到 Pushgateway
curl --data-binary @"$TMP_METRICS_FILE" "$PUSHGATEWAY_URL/metrics/job/$JOB_NAME"

python实现

import paramiko
import requests
import socket

# Pushgateway 配置
PUSHGATEWAY_URL = "http://your-pushgateway:9091"
JOB_NAME = "remote_process_monitor"

# 主机与对应的进程列表(自定义)
host_process_map = {
    "host1.domain.com": ["nginx", "mysqld"],
    "host2.domain.com": ["redis-server"],
    "host3.domain.com": ["java", "kafka"],
    "host4.domain.com": ["your_process"]
}

# SSH 登录配置
SSH_USER = "your_user"
SSH_KEY_PATH = "/home/your_user/.ssh/id_rsa"  # 确保有权限访问

# 创建 SSH 客户端
def ssh_exec_command(host, command):
    try:
        key = paramiko.RSAKey.from_private_key_file(SSH_KEY_PATH)
        client = paramiko.SSHClient()
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        client.connect(hostname=host, username=SSH_USER, pkey=key, timeout=5)

        stdin, stdout, stderr = client.exec_command(command)
        output = stdout.read().decode().strip()
        client.close()
        return output
    except Exception as e:
        print(f"[ERROR] SSH to {host} failed: {e}")
        return "-1"  # 网络错误或超时等标记为 -1

# 构造 Prometheus 指标数据
def build_metrics():
    metrics = "# TYPE process_up gauge\n"
    for host, processes in host_process_map.items():
        for proc in processes:
            status_cmd = f"pgrep -f '{proc}' > /dev/null && echo 1 || echo 0"
            status = ssh_exec_command(host, status_cmd)
            metrics += f'process_up{{host="{host}",process="{proc}"}} {status}\n'
    return metrics

# 推送到 Pushgateway
def push_to_gateway(metrics_data):
    url = f"{PUSHGATEWAY_URL}/metrics/job/{JOB_NAME}/instance/{socket.gethostname()}"
    try:
        response = requests.post(url, data=metrics_data.encode("utf-8"))
        if response.status_code != 202:
            print(f"[ERROR] Push failed: {response.status_code} {response.text}")
        else:
            print("[INFO] Push success.")
    except Exception as e:
        print(f"[ERROR] Exception during push: {e}")

if __name__ == "__main__":
    metrics = build_metrics()
    print("[DEBUG] Generated metrics:\n", metrics)
    push_to_gateway(metrics)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

dmonstererer

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值