进程检测及资源限制自愈

这是一个Python脚本,用于监控系统资源(CPU、内存)和服务状态。当进程或服务的CPU、内存使用率达到预设阈值时,脚本会执行重启操作。它适用于单个服务多进程场景,通过`top`命令获取资源利用率,并使用`systemctl`管理服务。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#!/usr/bin/python3
# encoding: utf-8
#filename: process-check-self-healing.py
#author: gaohaixiang
#writetime:202206141535

import re
import time
import subprocess


"""
# 使用注意事项:
涉及多项检测及自愈功能,按实际需求更改脚本

连续多次监测到cpu,内存大于某个值,重启
或者检测到服务不是active,重启服务,根据实际情况进行定制

假如一份服务有多个进程,则将多个进程的使用率加起来,计算该服务的总使用率做为服务的使用率

此脚本适用一个服务多个进程,一个服务一个进程
不适合几个服务几个进程,否则会将多个服务全部杀死重启

脚本中进程与服务区别:
进程需要使用命令进行杀死及启动
服务可以使用 systemctl 命令停止及启动,如 systemctl restart httpd

top取值说明:
top -d 2 -n 3
取值三次,每次间隔2秒,mem和cpu取的值除以3,获取3次得平均值

"""

# top命令执行
def TOP_monitor(topCMD):
    # topCMD = "top -n 1"
    topCMDresult = subprocess.getoutput(topCMD)

    # print(topCMDresult)
    return topCMDresult

# 从top命令结果获取内存使用率
def TOP_MEM_monitor(topCMDresult,PRO_NAME):
    #print(topCMDresult)
    # PRO_NAME = "top"
    PRO_MEM_imformations = 0.0
    top_lines = topCMDresult.split("\n")
    #print(top_lines)
    for top_line in top_lines:
        #print(top_line)
        if re.findall(PRO_NAME,top_line) and top_line.strip().endswith(PRO_NAME):
            PRO_information = top_line.split()
            # print("进程PID,用户,优先级,优先级,虚拟内存,物理内存,共享内存,进程状态,cpu,mem,占用cpu时间,进程名")
            # print(PRO_information)
            PRO_MEM_imformations = PRO_MEM_imformations + float(PRO_information[9])
            # print(PRO_MEM_imformation)


    #print(PRO_MEM_imformations)
    return PRO_MEM_imformations

# 从top命令结果获取CPU使用率
def TOP_CPU_monitor(topCMDresult,PRO_NAME):

    # print(topCMDresult)
    #PRO_NAME = "top"
    PRO_CPU_imformations = 0.0
    top_lines = topCMDresult.split("\n")
    # print(top_lines)
    for top_line in top_lines:
        # print(top_line)
        if re.findall(PRO_NAME, top_line) and top_line.strip().endswith(PRO_NAME):
            PRO_information = top_line.split()
            # print("进程PID,用户,优先级,优先级,虚拟内存,物理内存,共享内存,进程状态,cpu,mem,占用cpu时间,进程名")
            # print(PRO_information)
            PRO_CPU_imformation = float(PRO_information[8])
            PRO_CPU_imformations = PRO_CPU_imformations + PRO_CPU_imformation
            # print(PRO_CPU_imformation)
    #print(PRO_CPU_imformations)
    return PRO_CPU_imformations

# 当前进程数量获取
def PRO_NUM_check(PRO_NAME):
    PRO_NUM_CMD = "ps -ef |grep %s |grep -v grep | wc -l" % PRO_NAME
    PRO_NUM = subprocess.getoutput(PRO_NUM_CMD)
    return int(PRO_NUM)

# 当前服务状态获取
def PRO_system_check(PRO_NAME):
    PRO_system_check_cmd = "systemctl status %s |grep 'Active:'|awk '{print $2}'" % PRO_NAME
    checkCMDoutput = subprocess.getoutput(PRO_system_check_cmd)
    return checkCMDoutput

# 使用命令启动进程
def PRO_CMD_start(PRO_CMD_start_cmd):
    startCMDoutput,startCMDstatus = subprocess.getstatusoutput(PRO_CMD_start_cmd)
    return startCMDoutput,startCMDstatus

# 重启服务
def PRO_system_restart(PRO_NAME):
    PRO_system_restart_cmd = "systemctl restart %s " % PRO_NAME
    restartCMDoutput,restartCMDstatus = subprocess.getstatusoutput(PRO_system_restart_cmd)
    return restartCMDoutput,restartCMDstatus

# 杀死进程
def PRO_CMD_kill(PRO_NAME):
    PRO_CMD_kill_cmd = "ps -ef |grep %s|grep -v grep|awk '{print $2}'|xargs kill -9" % PRO_NAME
    CMDoutput,CMDstatus = subprocess.getstatusoutput(PRO_CMD_kill_cmd)
    return CMDoutput,CMDstatus

# 系统总CPU数量获取
def GET_CPU_NUM_total():
    GET_CPU_NUM_total_cmd = "cat /proc/cpuinfo |grep processor|wc -l"
    cpu_num_output = subprocess.getoutput(GET_CPU_NUM_total_cmd)
    return cpu_num_output

# 系统总内存获取
def GET_MEM_NUM_total():
    GET_MEM_NUM_taotal_cmd = "cat /proc/meminfo |grep 'MemTotal:'|awk '{print $2}'"
    mem_num_output = subprocess.getoutput(GET_MEM_NUM_taotal_cmd)
    return mem_num_output

if __name__ == '__main__':
    starttime = time.time()
    print ("Process is running...")

    # top命令
    topCMD = "top -d 2 -n 3"    # 取3次值,每次取值时间间隔2秒
    # 进程名称
    PRO_NAME = "httpd"
    # 进程数量
    pro_num = 1
    # 进程启动命令
    PRO_CMD_start_cmd = "systemctl start httpd" # 其他的启动命令
    # 限制进程CPU使用率,占用系统的总量
    Limit_cpu_used_total = 0.9
    # 限制进程内存使用率,占用系统的总量
    Limit_mem_used_total = 0.9


    '''# service 服务检测,当服务状态不是active时候,进行服务
    # 无限重启,知道服务状态达到active'''
    PRO_system_check_outputs = ""
    while PRO_system_check_outputs !=  'active':
        PRO_system_restart(PRO_NAME)
        PRO_system_check_outputs = PRO_system_restart(PRO_NAME)

    '''# 进程数量检测,当进程数量达不到该有的数量时候,
    # 进程进行无限重启,直到进程数量达到标准'''
    pro_nums = 0
    while pro_nums < pro_num:
        PRO_CMD_start(PRO_CMD_start_cmd)
        pro_nums = PRO_NUM_check(PRO_NAME)

    """
    '''# 进程占用cpu总量的90%及以上,重启进程'''
    # 执行top命令
    topCMDresult = TOP_monitor(topCMD)
    # 获取cpu使用百分比
    PRO_CPU_imformations = TOP_CPU_monitor(topCMDresult,PRO_NAME)/3
    # 获取cpu的总量
    cpu_num_output = int(GET_CPU_NUM_total())
    if PRO_CPU_imformations/(cpu_num_output * 100) > 0.9:
        PRO_CMD_kill(PRO_NAME)
        pro_nums = 0
        while pro_nums < pro_num:
            PRO_CMD_start(PRO_CMD_start_cmd)
            pro_nums = PRO_NUM_check(PRO_NAME)
    """

    '''# 进程占用cpu总量的90%及以上,重启服务'''
    # 执行top命令
    topCMDresult = TOP_monitor(topCMD)
    # 获取cpu使用百分比
    PRO_CPU_imformations = TOP_CPU_monitor(topCMDresult,PRO_NAME)/3
    # 获取cpu的总量
    cpu_num_output = int(GET_CPU_NUM_total())
    if PRO_CPU_imformations/(cpu_num_output * 100) > 0.9:
        PRO_system_restart(PRO_NAME)
        PRO_system_check_outputs = ""
        while PRO_system_check_outputs != 'active':
            PRO_system_restart(PRO_NAME)
            PRO_system_check_outputs = PRO_system_restart(PRO_NAME)

    """
    '''# 进程占用内存总量的90%及以上,重启进程'''
    # 执行top命令
    topCMDresult = TOP_monitor(topCMD)
    # 获取cpu使用百分比
    PRO_MEM_imformations = TOP_MEM_monitor(topCMDresult,PRO_NAME)/3
    # 获取cpu的总量
    mem_num_output = GET_MEM_NUM_total()
    if PRO_MEM_imformations  > 0.9:
        PRO_CMD_kill(PRO_NAME)
        pro_nums = 0
        while pro_nums < pro_num:
            PRO_CMD_start(PRO_CMD_start_cmd)
            pro_nums = PRO_NUM_check(PRO_NAME)
    """

    '''# 进程占用内存总量的90%及以上,重启服务'''
    # 执行top命令
    topCMDresult = TOP_monitor(topCMD)
    # 获取cpu使用百分比
    PRO_MEM_imformations = TOP_MEM_monitor(topCMDresult,PRO_NAME)/3
    # 获取cpu的总量
    mem_num_output = GET_MEM_NUM_total()
    if PRO_MEM_imformations  > 0.9:
        PRO_system_restart(PRO_NAME)
        PRO_system_check_outputs = ""
        while PRO_system_check_outputs != 'active':
            PRO_system_restart(PRO_NAME)
            PRO_system_check_outputs = PRO_system_restart(PRO_NAME)


    endtime = time.time()
    print (endtime-starttime)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值