#!/usr/bin/env python3
import rospy
import os
import threading
import time
import subprocess
import signal
import shutil
import logging
from logging.handlers import RotatingFileHandler
from datetime import datetime, timedelta
from collections import deque
import std_msgs.msg
from std_msgs.msg import String, Int8
from bag_recorder.msg import Rosbag, vehicle_basic_info, Lane
class LocAbnormalRecorder:
def __init__(self):
# 初始化ROS节点
rospy.init_node('loc_abnormal_recorder', anonymous=True)
# 配置参数扩展
self.min_free_space_gb = rospy.get_param('~min_free_space_gb', 1.0)
self.clean_threshold_gb = rospy.get_param('~clean_threshold_gb', 50.0)
self.file_expire_days = rospy.get_param('~file_expire_days', 15)
self.clean_interval_hours = rospy.get_param('~clean_interval_hours', 1)
# 原有参数
self.jump_threshold = rospy.get_param('~jump_threshold', 5.0)
self.stable_duration = rospy.get_param('~stable_duration', 60.0)
# 录制目录配置
default_normal_dirs = {
0: "/home/ykxn/SSD1T/DATA_LOC/0_无定位",
1: "/home/ykxn/SSD1T/DATA_LOC/1_纯惯导",
2: "/home/ykxn/SSD1T/DATA_LOC/2_RTK",
3: "/home/ykxn/SSD1T/DATA_LOC/3_SLAM"
}
self.normal_rec_dir = rospy.get_param('~normal_rec_dirs', default_normal_dirs)
self.abnormal_rec_dir = rospy.get_param('~abnormal_rec_dir', "/home/ykxn/SSD1T/DATA_ABNORMAL")
self.log_file = rospy.get_param('~log_file', "/home/ykxn/SSD1T/abnormal_log.txt")
# 参数验证
self._validate_parameters()
# 状态变量
self.rec_flag = 0
self.spd_prev = 0.0
self.time_st = 0.0
self.st_enbl = 1
self.ed_enbl = 0
self.mode_prev = 1
self.loc_mode = 0
self.loc_mode_prev = 0
self.loc_mode_start_time = time.time()
self.abnormal_flag = 0
self.abnormal_rec_flag = 0
self.abnormal_type = ""
self.recent_loc_modes = deque(maxlen=5)
self.rec_processes = {}
self.abnormal_process = None
# 线程安全和状态管理
self.cleanup_lock = threading.Lock()
self.recording_lock = threading.Lock()
self._last_disk_check = 0
self._last_disk_result = True
# 初始化系统
self.init_rotating_log()
self.create_directories()
# 磁盘检查和定时清理
if not self.check_disk_space():
self.logger.error(f"磁盘可用空间不足{self.min_free_space_gb}GB,录制功能将受限!")
self.start_clean_timer()
self.start_process_monitor()
# ROS订阅和关机钩子
rospy.on_shutdown(self.shutdown_hook)
rospy.Subscriber("i2t_basic_info", vehicle_basic_info, self.vehicle_callback)
rospy.Subscriber("/localization_mode", Int8, self.localization_callback)
self.logger.info("定位异常录制节点初始化完成(优化版)")
def _validate_parameters(self):
"""验证参数合理性"""
if self.min_free_space_gb <= 0:
rospy.logwarn("最小空间阈值必须大于0,使用默认值1.0GB")
self.min_free_space_gb = 1.0
if self.clean_threshold_gb <= self.min_free_space_gb:
self.clean_threshold_gb = self.min_free_space_gb + 5.0
rospy.logwarn(f"清理阈值应大于最小空间阈值,自动调整为{self.clean_threshold_gb}GB")
if self.file_expire_days < 1:
rospy.logwarn("文件过期时间至少1天,使用默认值15天")
self.file_expire_days = 15
def create_directories(self):
"""创建所有必要的目录"""
try:
for dir_path in self.normal_rec_dir.values():
os.makedirs(dir_path, exist_ok=True)
# 检查目录可写性
if not os.access(dir_path, os.W_OK):
self.logger.error(f"目录不可写: {dir_path}")
os.makedirs(self.abnormal_rec_dir, exist_ok=True)
if not os.access(self.abnormal_rec_dir, os.W_OK):
self.logger.error(f"目录不可写: {self.abnormal_rec_dir}")
self.logger.info("所有录制目录已创建/确认")
except OSError as e:
self.logger.error(f"创建目录失败: {e}")
raise
def init_rotating_log(self):
"""初始化日志轮转:单个日志文件50MB,最多保留5个备份"""
self.logger = logging.getLogger('LocAbnormalRecorder')
self.logger.setLevel(logging.DEBUG)
if self.logger.handlers:
self.logger.handlers.clear()
# 文件日志(轮转)
file_handler = RotatingFileHandler(
self.log_file,
maxBytes=50 * 1024 * 1024,
backupCount=5,
encoding='utf-8'
)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
file_handler.setLevel(logging.INFO)
# 终端日志
console_handler = logging.StreamHandler()
console_handler.setFormatter(file_formatter)
console_handler.setLevel(logging.DEBUG)
self.logger.addHandler(file_handler)
self.logger.addHandler(console_handler)
self.logger.info("定位异常录制节点启动(优化版)")
self.logger.info(f"配置参数:跳动阈值{self.jump_threshold}秒,稳定阈值{self.stable_duration}秒")
def check_disk_space(self, check_dir=None, force_check=False):
"""带缓存的磁盘空间检查"""
current_time = time.time()
# 缓存2秒,避免频繁检查(除非强制检查)
if not force_check and current_time - self._last_disk_check < 2.0:
return self._last_disk_result
check_dir = check_dir or self.abnormal_rec_dir
try:
disk_usage = shutil.disk_usage(check_dir)
free_space_gb = disk_usage.free / (1024 ** 3)
total_space_gb = disk_usage.total / (1024 ** 3)
self._last_disk_check = current_time
self._last_disk_result = free_space_gb >= self.min_free_space_gb
if not self._last_disk_result:
self.logger.error(f"磁盘空间不足!当前{free_space_gb:.1f}GB < 最小需求{self.min_free_space_gb}GB")
else:
self.logger.debug(f"磁盘状态:总空间{total_space_gb:.1f}GB,可用空间{free_space_gb:.1f}GB")
return self._last_disk_result
except OSError as e:
self.logger.error(f"检查磁盘空间失败:{e}")
return False
def clean_old_files(self):
"""清理旧文件:过期文件 + 空间不足时删除最旧文件"""
# 非阻塞获取锁,避免重复清理
if not self.cleanup_lock.acquire(blocking=False):
self.logger.debug("清理操作正在进行,跳过本次")
return
try:
clean_dirs = list(self.normal_rec_dir.values()) + [self.abnormal_rec_dir]
expire_time = datetime.now() - timedelta(days=self.file_expire_days)
total_expired_deleted = 0
total_space_deleted = 0
for dir_path in clean_dirs:
if not os.path.exists(dir_path):
continue
expired_deleted, space_deleted = self._clean_directory(dir_path, expire_time)
total_expired_deleted += expired_deleted
total_space_deleted += space_deleted
if total_expired_deleted > 0 or total_space_deleted > 0:
self.logger.info(f"清理完成:删除过期文件{total_expired_deleted}个,空间不足删除{total_space_deleted}个")
else:
self.logger.debug("无需清理:空间充足且无过期文件")
except Exception as e:
self.logger.error(f"清理过程出现异常: {e}")
finally:
self.cleanup_lock.release()
def _clean_directory(self, dir_path, expire_time):
"""清理单个目录"""
expired_deleted = 0
space_deleted = 0
# 收集所有bag文件
bag_files = []
for filename in os.listdir(dir_path):
file_path = os.path.join(dir_path, filename)
if os.path.isfile(file_path) and filename.endswith('.bag'):
file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
bag_files.append((file_mtime, file_path, filename))
# 步骤1:删除过期文件
for file_mtime, file_path, filename in bag_files[:]: # 使用副本遍历
if file_mtime < expire_time:
try:
file_size = os.path.getsize(file_path)
os.remove(file_path)
expired_deleted += 1
self.logger.info(f"删除过期文件:{filename}(修改时间:{file_mtime.strftime('%Y-%m-%d')})")
# 从列表中移除已删除文件
bag_files.remove((file_mtime, file_path, filename))
except OSError as e:
self.logger.error(f"删除过期文件{filename}失败:{e}")
# 步骤2:检查空间并删除旧文件
disk_usage = shutil.disk_usage(dir_path)
free_space_gb = disk_usage.free / (1024 ** 3)
if free_space_gb >= self.clean_threshold_gb:
return expired_deleted, space_deleted
# 按修改时间排序(最旧优先)
sorted_files = sorted(bag_files, key=lambda x: x[0])
for file_mtime, file_path, filename in sorted_files:
if free_space_gb >= self.clean_threshold_gb:
break
try:
file_size_gb = os.path.getsize(file_path) / (1024 ** 3)
os.remove(file_path)
space_deleted += 1
# 重新获取实际空间
disk_usage = shutil.disk_usage(dir_path)
free_space_gb = disk_usage.free / (1024 ** 3)
self.logger.warning(f"磁盘空间不足,删除旧文件:{filename}(释放{file_size_gb:.2f}GB,当前可用{free_space_gb:.1f}GB)")
except OSError as e:
self.logger.error(f"删除旧文件{filename}失败:{e}")
return expired_deleted, space_deleted
def start_clean_timer(self):
"""启动定时清理线程"""
def clean_loop():
while not rospy.is_shutdown():
try:
self.clean_old_files()
except Exception as e:
self.logger.error(f"定时清理异常: {e}")
time.sleep(self.clean_interval_hours * 3600)
clean_thread = threading.Thread(target=clean_loop, daemon=True)
clean_thread.start()
self.logger.info(f"定时清理线程已启动(每{self.clean_interval_hours}小时执行一次)")
def start_process_monitor(self):
"""启动进程监控定时器"""
def monitor_callback(event):
self.monitor_recording_processes()
# 每30秒检查一次进程状态
rospy.Timer(rospy.Duration(30), monitor_callback)
self.logger.info("进程监控定时器已启动")
def monitor_recording_processes(self):
"""监控录制进程状态"""
with self.recording_lock:
# 检查正常录制进程
for mode, process in list(self.rec_processes.items()):
if process and process.poll() is not None:
self.logger.warning(f"定位模式{mode}的录制进程意外退出,退出码: {process.returncode}")
self.rec_processes[mode] = None
# 检查异常录制进程
if self.abnormal_process and self.abnormal_process.poll() is not None:
self.logger.error(f"异常录制进程意外退出,退出码: {self.abnormal_process.returncode}")
self.abnormal_process = None
self.abnormal_rec_flag = 0
if self.abnormal_flag == 1:
self.abnormal_flag = 0
self.logger.warning("异常录制标志已重置")
def localization_callback(self, data):
"""定位模式回调"""
self.loc_mode = data.data
self.logger.debug(f"当前定位模式: {self.loc_mode}")
def vehicle_callback(self, data):
"""车辆信息回调"""
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
current_timestamp = time.time()
spd = data.speed
current_mode = data.mode
# 仅在"自动驾驶+速度>0"时处理
if current_mode == 1 and spd > 0.0:
# 磁盘空间不足时,停止所有录制并返回
if not self.check_disk_space():
self.stop_all_recording()
return
self.update_loc_mode_state(current_timestamp)
self.handle_normal_recording(current_time, current_timestamp)
self.check_abnormal_conditions(current_time, current_timestamp)
self.handle_abnormal_recovery(current_time, current_timestamp)
self.spd_prev = spd
self.mode_prev = current_mode
self.loc_mode_prev = self.loc_mode
def update_loc_mode_state(self, current_timestamp):
"""更新定位模式状态"""
if self.loc_mode != self.loc_mode_prev:
self.recent_loc_modes.append((self.loc_mode_prev, current_timestamp))
self.loc_mode_start_time = current_timestamp
self.logger.info(f"定位模式变化: {self.loc_mode_prev}→{self.loc_mode}")
def handle_normal_recording(self, current_time, current_timestamp):
"""处理正常录制"""
if self.loc_mode in self.normal_rec_dir.keys():
rec_dir = self.normal_rec_dir[self.loc_mode]
if self.loc_mode != self.loc_mode_prev:
self.stop_normal_recording(self.loc_mode_prev)
self.start_normal_recording(self.loc_mode, rec_dir, current_time)
def start_normal_recording(self, loc_mode, rec_dir, current_time):
"""启动正常录制"""
if not self.check_disk_space(rec_dir):
self.logger.error(f"磁盘空间不足,无法启动定位模式{loc_mode}的正常录制")
return
with self.recording_lock:
try:
# 停止现有录制
if loc_mode in self.rec_processes and self.rec_processes[loc_mode] is not None:
self._terminate_process(self.rec_processes[loc_mode], f"定位模式{loc_mode}的旧进程")
topics = [
"/final_waypoints", "/estimate_twist", "/ndt_pose",
"/localizer_pose", "/tf", "/detection/lidar_detector/objects",
"/i2t_basic_info", "/localization_mode"
]
cmd = [
'rosbag', 'record', '--split', '--duration=20',
'-o', f'loc_mode_{loc_mode}'
] + topics
process = subprocess.Popen(
cmd,
cwd=rec_dir,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
preexec_fn=os.setsid
)
self.rec_processes[loc_mode] = process
self.logger.info(f"{current_time} 【正常录制】定位模式{loc_mode}录制启动(目录:{rec_dir})")
except Exception as e:
self.logger.error(f"启动正常录制失败: {e}")
def stop_normal_recording(self, loc_mode):
"""停止正常录制"""
with self.recording_lock:
if loc_mode in self.rec_processes and self.rec_processes[loc_mode] is not None:
self._terminate_process(self.rec_processes[loc_mode], f"定位模式{loc_mode}的录制")
self.rec_processes[loc_mode] = None
def check_abnormal_conditions(self, current_time, current_timestamp):
"""检查异常条件"""
if self.abnormal_flag == 0:
current_mode_duration = current_timestamp - self.loc_mode_start_time
if (self.loc_mode_prev in [2, 3]) and (self.loc_mode in [0, 1]):
self.trigger_abnormal_recording(
current_time,
f"正常模式{self.loc_mode_prev}→{self.loc_mode}(无定位/纯惯导)"
)
elif (self.loc_mode_prev == 3) and (self.loc_mode == 2) and (current_mode_duration >= self.stable_duration):
self.trigger_abnormal_recording(
current_time,
f"SLAM(3)稳定{int(self.stable_duration)}秒→RTK(2)"
)
elif (self.loc_mode_prev == 2) and (self.loc_mode == 3) and (current_mode_duration >= self.stable_duration):
self.trigger_abnormal_recording(
current_time,
f"RTK(2)稳定{int(self.stable_duration)}秒→SLAM(3)"
)
elif self.check_mode_jump():
self.trigger_abnormal_recording(
current_time,
f"RTK(2)与SLAM(3)反复跳动(间隔≤{self.jump_threshold}秒)"
)
def check_mode_jump(self):
"""检查模式跳动"""
if len(self.recent_loc_modes) < 3:
return False
jump_count = 0
for i in range(1, len(self.recent_loc_modes)):
prev_mode, prev_time = self.recent_loc_modes[i-1]
curr_mode, curr_time = self.recent_loc_modes[i]
if prev_mode in [2, 3] and curr_mode in [2, 3] and prev_mode != curr_mode:
if curr_time - prev_time <= self.jump_threshold:
jump_count += 1
return jump_count >= 2
def trigger_abnormal_recording(self, current_time, abnormal_type):
"""触发异常录制"""
if not self.check_disk_space(self.abnormal_rec_dir):
self.logger.error(f"磁盘空间不足,无法启动异常录制(类型:{abnormal_type})")
return
with self.recording_lock:
try:
self.logger.info(f"[{current_time}] 【异常触发】{abnormal_type}")
if self.abnormal_rec_flag == 0:
safe_type = abnormal_type.replace("→", "to").replace("(", "_").replace(")", "").replace(" ", "_")
abnormal_filename = f"abnormal_{current_time.replace(' ', '_').replace(':', '-')}_{safe_type}"
topics = [
"/final_waypoints", "/estimate_twist", "/ndt_pose",
"/localizer_pose", "/tf", "/localization_mode",
"/imu/data", "/gps/data"
]
cmd = ['rosbag', 'record', '-o', abnormal_filename] + topics
self.abnormal_process = subprocess.Popen(
cmd,
cwd=self.abnormal_rec_dir,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
preexec_fn=os.setsid
)
self.abnormal_rec_flag = 1
self.abnormal_type = abnormal_type
self.abnormal_flag = 1
except Exception as e:
self.logger.error(f"触发异常录制失败: {e}")
def handle_abnormal_recovery(self, current_time, current_timestamp):
"""处理异常恢复"""
if self.abnormal_flag == 1:
current_mode_duration = current_timestamp - self.loc_mode_start_time
if (self.loc_mode in [2, 3]) and (current_mode_duration >= 10.0):
self.stop_abnormal_recording(current_time)
self.abnormal_flag = 0
self.abnormal_rec_flag = 0
self.abnormal_type = ""
def stop_abnormal_recording(self, current_time):
"""停止异常录制"""
with self.recording_lock:
if self.abnormal_process is not None:
self._terminate_process(self.abnormal_process, "异常录制")
self.abnormal_process = None
self.logger.info(f"{current_time} 【异常恢复】{self.abnormal_type},停止异常录制")
def _terminate_process(self, process, process_name):
"""安全终止进程"""
if process and process.poll() is None:
try:
# 尝试优雅终止
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
process.wait(timeout=3.0)
self.logger.info(f"{process_name}已正常停止")
except subprocess.TimeoutExpired:
self.logger.warning(f"{process_name}未正常退出,强制终止")
try:
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
process.wait(timeout=2.0)
except Exception as e:
self.logger.error(f"强制终止{process_name}失败: {e}")
except Exception as e:
self.logger.error(f"停止{process_name}时出现错误: {e}")
def stop_all_recording(self):
"""停止所有录制"""
with self.recording_lock:
# 停止正常录制
for loc_mode in list(self.rec_processes.keys()):
self.stop_normal_recording(loc_mode)
# 停止异常录制
if self.abnormal_process is not None:
self._terminate_process(self.abnormal_process, "异常录制")
self.abnormal_process = None
self.abnormal_rec_flag = 0
self.abnormal_flag = 0
self.logger.warning("因磁盘空间不足,已停止所有录制")
def shutdown_hook(self):
"""关机钩子"""
self.logger.info("节点关闭,停止所有录制进程")
self.stop_all_recording()
self.logger.info("定位异常录制节点已安全关闭")
def run(self):
"""运行节点"""
self.logger.info("定位异常录制节点开始运行")
rospy.spin()
if __name__ == "__main__":
try:
recorder = LocAbnormalRecorder()
recorder.run()
except rospy.ROSInterruptException:
print("节点被手动停止")
except Exception as e:
print(f"节点运行出现未预期错误: {e}")