遇到一块ESP32 devkit 经常几天死机一次,以为是代码有问题给跑死了,于是从NTP时间同步入手,逐步扩展出一个基础检测类,测试这块单片机是否真有硬件问题。稳定运行几年不死机比较重要,“时间、重启、日志”是三要素。
1、NTP时间同步RTC准确
2、看门狗WDT异常重启
3、指定时间周期性重启
4、简单日志记录1000行txt文件
5、重启后需要有变量记忆的持久化json文件
初步测试发现2个问题:
1> 0分内多次重启:因为machine.reset()使前世的运行时变量失忆,得用json变量持久化解决;
2> 日志记录限制1000行信息足够了,文件不能一直涨大;

修改后整点重启一次正常:

std_ntp.py:
"""
标准库 std_ntp.py 20250924
"""
from machine import RTC
import machine, network, ntptime, time, json
class SystemMonitor:
def __init__(self):
ntptime.host = (
"ntp.aliyun.com" # 改用阿里云NTP服务器 NTP 默认使用 pool.ntp.org 服务器
)
ntptime.host = "172.16.30.79" # 内网NTP服务器
try: # 整点重启失忆 后恢复记忆 持久化变量
self.last_hour = self.load_last_reboot_hour()
except Exception as e:
print("首次运行无历史记忆:", e)
self.last_hour = -1 # 整点记忆 初始状态标识:表示还没有成功记录过重启时间
self.sync_time()
self.connection_retries = 0 # 网络连接
# 同步时间(默认使用UTC时区)
def sync_time(self):
try:
ntptime.settime() # 从NTP服务器同步时间到RTC
print("时间同步成功")
except Exception as e:
print("时间同步失败:", e)
def load_last_reboot_hour(self):
"""从文件加载上次重启的小时"""
try:
with open("last_reboot.json", "r") as f:
data = json.load(f)
return data.get("hour", -1) # 找不到hour键时返回-1
except:
return -1 # 文件读取异常时也返回-1
def save_last_reboot_hour(self, hour):
"""保存重启小时到文件"""
try:
with open("last_reboot.json", "w") as f:
json.dump({"hour": hour}, f)
except Exception as e:
print("保存重启时间失败:", e)
def scheduled_reboot(self, target_hour=None):
"""整点重启检查(与WDT互补,使用文件持久化)"""
try:
t = time.localtime()
hour = t[3]
minute = t[4]
second = t[5]
# 转换为UTC+8 北京时间
beijing_time = time.mktime(
(t[0], t[1], t[2], t[3] + 8, t[4], t[5], t[6], t[7])
)
local_time = time.localtime(beijing_time)
print("{}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}".format(*local_time))
# 整点重启逻辑 最简单的避免0分内多次重启问题:and second < 10
if minute == 0 and hour != self.last_hour:
if target_hour is None or hour == target_hour:
# 保存到文件,然后重启
self.save_last_reboot_hour(hour)
print(f"到达整点 {(hour+8)%24}:00,执行计划重启") # mod(24) %
# self.last_hour = hour # 未持久化重启失忆
self.log_reboot_reason(f"计划重启于 {(hour+8)%24}:00")
time.sleep(3)
machine.reset()
except Exception as e:
print("时间检查失败:", e) # 时间检查失败: name 'machine' isn't defined
def log_reboot_reason(self, reason):
"""记录重启原因(可选功能)"""
try:
with open("reboot_log.txt", "a") as f:
t = time.localtime()
f.write(
f"{t[0]}-{t[1]:02d}-{t[2]:02d} {t[3]:02d}:{t[4]:02d}:{t[5]:02d} - {reason}\n"
)
with open("reboot_log.txt", "r") as f:
lines = f.readlines()
if len(lines) > 1000:
lines = lines[100:]
f.write(f"日志文件已修剪,删除前100行,保留{len(lines)}行")
with open("reboot_log.txt", "w") as f:
f.writelines(lines)
except Exception as e:
print(f"记录日志失败: {e}")
pass
def check_system_health(self):
"""综合系统健康检查"""
# 检查内存
try:
import gc
print(f"mem_alloc/mem_free={gc.mem_alloc()}/{gc.mem_free()}")
if gc.mem_free() < 10000: # 内存不足
return False
except:
pass
# 检查网络连接
try:
wlan = network.WLAN(network.STA_IF)
if not wlan.isconnected():
self.connection_retries += 1
if self.connection_retries > 10: # 连续10次检查失败
return False
else:
self.connection_retries = 0
except:
self.connection_retries += 1
return True
monitor = SystemMonitor()
while True:
monitor.scheduled_reboot() # (N)每天N点重启
monitor.check_system_health()
time.sleep(1)

被折叠的 条评论
为什么被折叠?



