Monit-基于非容器服务自恢复程序实践

1.需求:因为历史原因和软件程序原因,有上百台服务和所在服务未运行在容器中,需要在程序奔溃自动拉起(以Java Python C++为主)。

2.目的:能够非人为干预快速自动恢复,要求检测频率在10s一次

3.实现方式

3.1 根据不同语言自己开发脚本实现自动拉起和通知(不够标准化-弃用)

  1 #!/bin/bash
  2 
  3 # ========== 使用说明 ==========
  4 # 1. 自定配置env: fat/prod等环境变量, nacos地址, lark webhook等
  5 # 2. 确保每个Java服务有对应的启动脚本(如 deploy-xxx.sh)
  6 # 3. 设置需要监控的Java服务列表(见下方SERVICES_PROCESS_ID等变量)
  7 # 4. 将此脚本添加到crontab中定期执行,例如每1分钟检测一次monitor是否在运行,如果在运行不会重复运行,不在运行则会后台运行:
  8 # */1 * * * * /bin/bash /path/to/this/monitor.sh >> /path/to/monitor.log 2>&1
  9 
 10 # ========== 环境变量加载 ==========
 11 # 在 crontab 中执行时,需要显式加载环境变量
 12 # 按优先级加载多个可能的环境变量文件
 13 ENV_FILES=(
 14     "/etc/profile"
 15     "/etc/bashrc"
 16     "/root/.bash_profile"
 17     "/root/.bashrc"
 18     "$HOME/.bash_profile"
 19     "$HOME/.bashrc"
 20 )
 21 
 22 for env_file in "${ENV_FILES[@]}"; do
 23     if [ -f "$env_file" ]; then
 24         echo "[$(date '+%Y-%m-%d %H:%M:%S')] 加载环境变量文件: $env_file" >> /tmp/monitor_env.log
 25         source "$env_file"
 26     fi
 27 done
 28 
 29 # 如果上述文件都没有加载到关键环境变量,则手动设置
 30 if [ -z "$JAVA_HOME" ]; then
 31     export JAVA_HOME="/opt/jdk-17.0.8"
 32     export CLASSPATH=".:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar"
 33     export PATH="$JAVA_HOME/bin:$PATH"
 34     echo "[$(date '+%Y-%m-%d %H:%M:%S')] 手动设置 JAVA_HOME: $JAVA_HOME" >> /tmp/monitor_env.log
 35 fi
 36 
 37 # 设置必要的环境变量
 38 export env_nacos_address="nacos.test.com:8848"
 39 export env_nacos_namespace="fat"
 40 export ENV="fat"
 41 
 42 # 记录环境变量状态(用于调试)
 43 {
 44     echo "=== 环境变量检查 ==="
 45     echo "时间: $(date)"
 46     echo "JAVA_HOME: $JAVA_HOME"
 47     echo "PATH: $PATH"
 48     echo "ENV: $ENV"
 49     echo "env_nacos_address: $env_nacos_address"
 50     which java
 51     java -version 2>&1
 52     echo "=== 环境变量检查结束 ==="
 53 } >> /tmp/monitor_env.log 2>&1
 54 
 55 # ========== 脚本配置 ==========
 56 target_folder="./"
 57 
 58 LARK_WEBHOOK="https://open.larksuite.com/open-apis/bot/v2/hook/token"
 59 LARK_LOG_FILE="$target_folder/lark.log"
 60 MONITOR_LOG_FILE="/tmp/check.logs"
 61 
 62 LARK_MAX_RETRY=3
 63 LARK_RETRY_DELAY=10
 64 LARK_ENV="fat"
 65 
 66 # ========== 文件锁机制 ==========
 67 LOCK_FILE="/tmp/service_monitor.lock"
 68 
 69 # 尝试获取锁,如果失败则退出
 70 exec 200>"$LOCK_FILE"
 71 flock -n 200 || {
 72     echo "[$(date '+%Y-%m-%d %H:%M:%S')] 监控脚本已在运行,退出本次执行 (PID: $(cat $LOCK_FILE 2>/dev/null || echo '未知'))" >> "$MONITOR_LOG_FILE"
 73     exit 0
 74 }
 75 
 76 # 将当前PID写入锁文件
 77 echo $$ > "$LOCK_FILE"
 78 
 79 # 设置退出时清理锁文件
 80 trap "rm -f $LOCK_FILE; exit" INT TERM EXIT
 81 
 82 echo "[$(date '+%Y-%m-%d %H:%M:%S')] 启动服务监控进程 (PID: $$)" | tee -a "$MONITOR_LOG_FILE"
 83 
 84 # ========== Lark 报警函数 ==========
 85 send_lark_alert() {
 86     local message="$1"
 87     local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
 88     
 89     local full_message="[$LARK_ENV][进程监控] $message\n"
 90     full_message+="服务器名: $(hostname)\n"
 91     full_message+="报警时间: $timestamp\n"
 92     #full_message+="<at id=ou_id1>Jame Mei</at>"
 93     #full_message+="<at id=ou_id2>Levi Li</at>"
 94     #full_message+="<at id=ou_id3>Bingbing Sun</at>"
 95     
 96     local json_data="{
 97         \"msg_type\": \"interactive\",
 98         \"card\": {
 99             \"elements\": [{
100                 \"tag\": \"div\",
101                 \"text\": {
102                     \"content\": \"$full_message\",
103                     \"tag\": \"lark_md\"
104                 }
105             }]
106         }
107     }"
108     
109     local attempt=1
110     while [[ $attempt -le $LARK_MAX_RETRY ]]; do
111         http_status=$(curl -s -o /dev/null -w "%{http_code}" \
112             -m 3 \
113             -X POST "$LARK_WEBHOOK" \
114             -H "Content-Type: application/json" \
115             -d "$json_data")
116         
117         if [[ $http_status -eq 200 ]]; then
118             echo "[$(date "+%F %T")] Lark报警发送成功" >> "$LARK_LOG_FILE"
119             return 0
120         else
121             sleep $LARK_RETRY_DELAY
122             ((attempt++))
123         fi
124     done
125     echo "[$(date "+%F %T")] 错误: Lark报警发送失败,已达最大重试次数: $attempt" >> "$LARK_LOG_FILE"
126     return 1
127 }
128 
129 send_alert_async() {
130     (
131         send_lark_alert "$1"
132     ) &> /dev/null &
133 }
134 
135 # ========== 服务配置 ==========
136 WORK_DIR="/data/scripts"
137 
138 # 服务列表
139 #SERVICES_PROCESS_ID[0]="pb-trading-engine-1.0-SNAPSHOT.jar"
140 #SERVICES_FRIENDLY_NAME[0]="交易引擎 (Trading Engine)"
141 #SERVICES_START_CMD[0]="./deploy-engine-new.sh start"
142 
143 SERVICES_PROCESS_ID[1]="pb-trading-market-1.0-SNAPSHOT.jar"
144 SERVICES_FRIENDLY_NAME[1]="行情服务 (Trading Market)"
145 SERVICES_START_CMD[1]="./pb-trading-market.sh start"
146 
147 SERVICES_PROCESS_ID[2]="rapidx-trading-query-realtime-1.0-SNAPSHOT.jar"
148 SERVICES_FRIENDLY_NAME[2]="实时查询 (Query Realtime)"
149 SERVICES_START_CMD[2]="./deploy-realtime-new.sh start"
150 
151 SERVICES_PROCESS_ID[3]="router-server-1.0.0-SNAPSHOT.jar"
152 SERVICES_FRIENDLY_NAME[3]="路由服务 (Router Server)"
153 SERVICES_START_CMD[3]="./router-server.sh start"
154 
155 SERVICES_PROCESS_ID[4]="pb-trading-query-1.0-SNAPSHOT.jar"
156 SERVICES_FRIENDLY_NAME[4]="交易查询 (Trading Query)"
157 SERVICES_START_CMD[4]="./pb-trading-query.sh start"
158 
159 SERVICES_PROCESS_ID[5]="rapidx-trading-market-gateway-1.0-SNAPSHOT.jar"
160 SERVICES_FRIENDLY_NAME[5]="行情网关 (Market Gateway)"
161 SERVICES_START_CMD[5]="./rapidx-trading-market-gateway.sh start"
162 
163 SERVICES_PROCESS_ID[6]="ltp-ems-1.0.0-master-SNAPSHOT.jar"
164 SERVICES_FRIENDLY_NAME[6]="订单管理 (EMS)"
165 SERVICES_START_CMD[6]="./deploy-ems-new.sh start"
166 
167 SERVICES_PROCESS_ID[7]="pb-trading-push-0.0.1-SNAPSHOT.jar"
168 SERVICES_FRIENDLY_NAME[7]="推送服务 (Trading Push)"
169 SERVICES_START_CMD[7]="./pb-trading-push.sh start"
170 
171 SERVICES_PROCESS_ID[8]="pb-trading-gateway-1.0-SNAPSHOT.jar"
172 SERVICES_FRIENDLY_NAME[8]="交易网关 (Trading Gateway)"
173 SERVICES_START_CMD[8]="./pb-trading-gateway.sh start"
174 
175 SERVICES_PROCESS_ID[9]="exchange-data-server-1.0.0-SNAPSHOT.jar"
176 SERVICES_FRIENDLY_NAME[9]="exchange-data-server"
177 SERVICES_START_CMD[9]="./ltp-exchange-data-server.sh start"
178 
179 SERVICES_PROCESS_ID[10]="rapidtrade-storage-1.0-SNAPSHOT.jar"
180 SERVICES_FRIENDLY_NAME[10]="rapidtrade-storage"
181 SERVICES_START_CMD[10]="./rapidtrade-storage.sh start"
182 
183 SERVICES_PROCESS_ID[11]="rapidx-trading-algo-server-1.0-SNAPSHOT.jar"
184 SERVICES_FRIENDLY_NAME[11]="rapidx-trading-algo-server"
185 SERVICES_START_CMD[11]="./rapidx-trading-algo-server.sh start"
186 
187 SERVICES_PROCESS_ID[12]="rapidtrade-mock-1.0-SNAPSHOT.jar"
188 SERVICES_FRIENDLY_NAME[12]="rapidtrade-mock"
189 SERVICES_START_CMD[12]="./rapidtrade-mock.sh start"
190 
191 SERVICES_PROCESS_ID[13]="rapidx-ws-simulator-1.0-SNAPSHOT.jar"
192 SERVICES_FRIENDLY_NAME[13]="rapidx-ws-simulator"
193 SERVICES_START_CMD[13]="./rapidx-ws-simulator.sh start"
194 
195 SERVICES_PROCESS_ID[14]="pb-trading-statistics-1.0-SNAPSHOT.jar"
196 SERVICES_FRIENDLY_NAME[14]="pb-trading-statistics"
197 SERVICES_START_CMD[14]="./pb-trading-statistics.sh start"
198 
199 SERVICES_PROCESS_ID[15]="rapidx-trading-onezero-maker-1.0-SNAPSHOT.jar"
200 SERVICES_FRIENDLY_NAME[15]="rapidx-trading-onezero-maker"
201 SERVICES_START_CMD[15]="./rapidx-trading-onezero-maker.sh start"
202 
203 SERVICES_PROCESS_ID[16]="pb-trading-transfer-1.0-SNAPSHOT.jar"
204 SERVICES_FRIENDLY_NAME[16]="pb-trading-transfer"
205 SERVICES_START_CMD[16]="./pb-trading-transfer.sh start"
206 
207 SERVICES_PROCESS_ID[17]="rapidx-trading-clearing-1.0-SNAPSHOT.jar"
208 SERVICES_FRIENDLY_NAME[17]="rapidx-trading-clearing"
209 SERVICES_START_CMD[17]="./rapidx-trading-clearing.sh start"
210 
211 SERVICES_PROCESS_ID[18]="pb-trading-monitor-1.0-SNAPSHOT.jar"
212 SERVICES_FRIENDLY_NAME[18]="pb-trading-monitor"
213 SERVICES_START_CMD[18]="./pb-trading-monitor.sh start"
214 
215 SERVICES_PROCESS_ID[19]="rapidx-trading-query-persistent-1.0-SNAPSHOT.jar"
216 SERVICES_FRIENDLY_NAME[19]="rapidx-trading-query-persistent"
217 SERVICES_START_CMD[19]="./deploy-persistent-new.sh start"
218 
219 SERVICES_PROCESS_ID[20]="rapidtrade-storage-1.0-SNAPSHOT.jar"
220 SERVICES_FRIENDLY_NAME[20]="rapidtrade-storage"
221 SERVICES_START_CMD[20]="./rapidtrade-storage.sh start"
222 
223 SERVICES_PROCESS_ID[21]="bitu-trade-1.0-SNAPSHOT.jar"
224 SERVICES_FRIENDLY_NAME[21]="bitu-trade"
225 SERVICES_START_CMD[21]=bitu-trade.sh
226 #SERVICES_START_CMD[21]="./deploy.sh start"
227 
228 SERVICES_PROCESS_ID[22]="ltp-data-integration-1.0-SNAPSHOT.jar"
229 SERVICES_FRIENDLY_NAME[22]="ltp-data-integration"
230 SERVICES_START_CMD[22]="./ltp-data-integration.sh start"
231 
232 SERVICES_PROCESS_ID[23]="ingest-server-app-1.0-SNAPSHOT.jar"
233 SERVICES_FRIENDLY_NAME[23]="ingest-server-app"
234 SERVICES_START_CMD[23]="./data-ingest-server.sh start"  
235 
236 SERVICES_PROCESS_ID[24]="ltp-data-visual-1.0-SNAPSHOT.jar"
237 SERVICES_FRIENDLY_NAME[24]="ltp-data-visual"
238 SERVICES_START_CMD[24]="./data-cam-visual.sh start" 
239 
240 
241 # ========== 主循环 ==========
242 mkdir -p "$(dirname "$MONITOR_LOG_FILE")"
243 
244 while true; do
245     for i in "${!SERVICES_PROCESS_ID[@]}"; do
246         process_id="${SERVICES_PROCESS_ID[$i]}"
247         friendly_name="${SERVICES_FRIENDLY_NAME[$i]}"
248         start_cmd="${SERVICES_START_CMD[$i]}"
249 
250         if ! pgrep -f "$process_id" > /dev/null; then
251             echo "[$(date '+%Y-%m-%d %H:%M:%S')] - 服务 [$friendly_name] (进程标识: $process_id) 未运行,正在重启..." | tee -a "$MONITOR_LOG_FILE"
252             
253             # 在子shell中启动服务,确保环境变量正确传递
254             (
255                 # 再次加载环境变量确保子shell中有正确的环境
256                 for env_file in "${ENV_FILES[@]}"; do
257                     if [ -f "$env_file" ]; then
258                         source "$env_file"
259                     fi
260                 done
261                 
262                 # 手动设置关键环境变量作为备用
263                 export JAVA_HOME="/opt/jdk-17.0.8"
264                 export CLASSPATH=".:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar"
265                 export PATH="$JAVA_HOME/bin:$PATH"
266                 export env_nacos_address="nacos.test.com:8848"
267                 export env_nacos_namespace="fat"
268                 export ENV="fat"
269                 
270                 cd "$WORK_DIR"
271                 echo "[$(date '+%Y-%m-%d %H:%M:%S')] - 启动目录: $(pwd), JAVA_HOME: $JAVA_HOME" >> "$MONITOR_LOG_FILE"
272                 $start_cmd >> "$MONITOR_LOG_FILE" 2>&1
273             ) &
274             
275             send_alert_async "服务 [$friendly_name] (进程标识: $process_id) 已停止运行!正在尝试重启。"
276         else
277             echo "[$(date '+%Y-%m-%d %H:%M:%S')] - 服务 [$friendly_name] 运行正常." >> "$MONITOR_LOG_FILE"
278         fi
279     done
280     
281     sleep 10
282 done
View Code

 

 

3.2 使用开源通用软件统一维护自动拉起(开发+运维都可以简单维护和使用)        

 1 #程序安装
 2 dnf install -y gcc make openssl-devel bison flex zlib-devel
 3 #apt install -y gcc make libssl-dev bison flex zlib1g-dev
 4 #yum install -y gcc make openssl-devel bison flex zlib-devel
 5 wget https://mmonit.com/monit/dist/monit-5.34.0.tar.gz
 6 tar xf monit-5.34.0.tar.gz && cd monit-5.34.0/
 7 ./configure --prefix=/usr/local/monit --without-pam && make && make install
 8 mkdir /usr/local/monit/etc -p && mkdir -p /usr/local/monit/etc/
 9 cp monitrc /usr/local/monit/etc/
10 chmod 600 /usr/local/monit/etc/monitrc #配置文件定义检测时间,检测配置文件
11 ln -s /usr/local/monit/bin/monit /usr/sbin/monit
12 monit --version
13 mkdir /etc/monit/conf.d/ -p #所有进程检测配置文件,如启动脚本变动修改这里即可
14 cp /usr/local/monit/bin/monit  /usr/bin/
15 
16 
17 #systemd配置:vi /etc/systemd/system/monit.service
18 [Unit]
19 Description=Monit process monitor
20 Documentation=https://mmonit.com/monit/
21 After=network.target
22 
23 [Service]
24 Type=forking
25 ExecStart=/usr/bin/monit -c  /usr/local/monit/etc/monitrc
26 ExecReload=/usr/bin/monit -c /usr/local/monit/etc/monitrc reload
27 ExecStop=/usr/bin/monit  -c  /usr/local/monit/etc/monitrc quit
28 PIDFile=/var/run/monit.pid
29 Restart=on-failure 
30 User=root
31 Group=root
32 
33 [Install]
34 WantedBy=multi-user.target  # 多用户模式下开机自启
35 
36 systemctl reload monit
37 systemctl enable monit
38 systemctl start monit
3.2.1程序安装.sh

 

 1 #进程配置方式vim /usr/local/monit/etc/monitrc:
 2 set daemon 10
 3 set logfile /var/log/monit.log
 4 
 5 # 服务配置 /etc/monit/conf.d/rapidtrade-mock.conf
 6 check process rapidtrade_mock matching "rapidtrade-mock"
 7     start program = "/data/scripts/rapidtrade-mock.sh start"
 8     stop program = "/data/scripts/rapidtrade-mock.sh stop"
 9     if does not exist then start
10 
11 
12 
13 #port配置方式:
14  check host rapidtrade_mock with address 127.0.0.1
15     if failed 
16         port 7040 
17         type tcp 
18         timeout 5 seconds 
19         for 2 cycles
20     then start
21     start program = "/data/scripts/rapidtrade-mock.sh start" as uid root and gid root
22     stop program = "/data/scripts/rapidtrade-mock.sh stop" as uid root and gid root
23     if 3 restarts within 5 cycles then timeout
24 
25 
26 
27 
28 #健康监测端口+路径:经过测试有问题,无法启动服务并恢复正常。
29 check host my_web_service with address 127.0.0.1
30     if failed
31         port 80
32         protocol http
33         request "/actuator/prometheus" # 指定要检查的健康检查端点路径
34         with timeout 10 seconds
35         for 3 cycles
36     then restart
37     start program = "/usr/bin/systemctl start my-service"
38     stop program = "/usr/bin/systemctl stop my-service"
3.2.2 配置使用

 

 

4.测试和使用

http://www.cnblogs.com/Jame-mei
Delphi 12.3 作为一款面向 Windows 平台的集成开发环境,由 Embarcadero Technologies 负责其持续演进。该环境以 Object Pascal 语言为核心,并依托 Visual Component Library(VCL)框架,广泛应用于各类桌面软件、数据库系统及企业级解决方案的开发。在此生态中,Excel4Delphi 作为一个重要的社区开源项目,致力于搭建 Delphi 与 Microsoft Excel 之间的高效桥梁,使开发者能够在自研程序中直接调用 Excel 的文档处理、工作表管理、单元格操作及宏执行等功能。 该项目以库文件与组件包的形式提供,开发者将其集成至 Delphi 工程后,即可通过封装良好的接口实现对 Excel 的编程控制。具体功能涵盖创建与编辑工作簿、格式化单元格、批量导入导出数据,乃至执行内置公式与宏指令等高级操作。这一机制显著降低了在财务分析、报表自动生成、数据整理等场景中实现 Excel 功能集成的技术门槛,使开发者无需深入掌握 COM 编程或 Excel 底层 API 即可完成复杂任务。 使用 Excel4Delphi 需具备基础的 Delphi 编程知识,并对 Excel 对象模型有一定理解。实践中需注意不同 Excel 版本间的兼容性,并严格遵循项目文档进行环境配置与依赖部署。此外,操作过程中应遵循文件访问的最佳实践,例如确保目标文件未被独占锁定,并实施完整的异常处理机制,以防数据损毁或程序意外中断。 该项目的持续维护依赖于 Delphi 开发者社区的集体贡献,通过定期更新以适配新版开发环境与 Office 套件,并修复已发现的问题。对于需要深度融合 Excel 功能的 Delphi 应用而言,Excel4Delphi 提供了经过充分测试的可靠代码基础,使开发团队能更专注于业务逻辑与用户体验的优化,从而提升整体开发效率与软件质量。 资源来源于网络分享,仅用于学习交流使用,请勿用于商业,如有侵权请联系我删除!
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值