1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
我的博客已迁移到xdoujiang.com请去那边和我交流 晚上某个时段收到某台服务器load很高,但再登录服务器查看时候load可能已经下降了, 所以使用shell脚本来处理解决问题 监控频率30秒1次 将系统负载超过一定数值就记录到文本里 cat load.sh
#!/bin/bash #-------------------------------------------------- #Author:jimmygong #Email:jimmygong@taomee.com #FileName:load.sh #Function: #Version:1.0 #Created:2015-06-02 #-------------------------------------------------- sleeptime=30 logpath= "/opt/scripts"
[[ -d $logpath ]] || mkdir $logpath
echosucc () { succstatus= "[ Ok ]"
printf "\033[32m $succstatus $* \033[0m\n"
} function usage ()
{ echo "Usage: $0 {start|stop}"
} if [[ $ # -ne 1 ]]
then usage
exit 1
fi function start ()
{ echosucc "Starting load monitor"
while true
do
load=$( cat /proc/loadavg | awk '{print $1}' )
cpunum=$( cat /proc/cpuinfo | grep -c processor)
if [[ $( echo "$load > $cpunum" | bc ) = 1 ]]
then
result=$( ps -eo pcpu,pmem,user,args| awk '$1 > 0' | sort -nr)
if [[ -n "$result" ]]
then
echo "$result" > $logpath /load .$( date + "%Y%m%d%H%M" )
fi
fi
sleep $sleeptime
done
} function stop ()
{ echo $$ > "$logpath/pidfile"
for PID in $( ps -eo pid, command | grep "$0" | grep - v grep | awk '{print $1}' );
do
[[ $PID != $( cat $logpath /pidfile ) ]] && kill -s TERM $PID >& /dev/null
done
echosucc 'Stopping load monitor'
} case $1 in
start)
start &
;;
stop)
stop
;;
*)
usage
;;
esac |
本文转自 xdoujiang 51CTO博客,原文链接:http://blog.51cto.com/7938217/1657964,如需转载请自行联系原作者