import cv2
import os
import time
import shutil
import threading
import queue
import re
import logging
import signal
import sys
from pathlib import Path
from collections import defaultdict
from ultralytics import YOLO
from logging.handlers import RotatingFileHandler
# 配置参数
class Config:
# 模型路径
MODEL_PATH = r"F:\yolov8\ultralytics-main\best.pt" # 初始模型路径
NEW_MODEL_DIR = r"F:\yolov8\ultralytics-main\retrained_models" # 新模型存储目录
# 图片路径
SOURCE_DIR = r"F:\yolov8\ultralytics-main\datasets\bvn\images\train" # 图片输入目录
OUTPUT_DIR = r"F:\yolov8\ultralytics-main\datasets\bvn\images\done" # 检测结果输出路径
NO_DETECTION_DIR = os.path.join(OUTPUT_DIR, "no_detection") # 未检测到目标的文件夹
LOG_DIR = os.path.join(OUTPUT_DIR, "logs") # 日志目录
BACKUP_DIR = os.path.join(OUTPUT_DIR, "backup") # 原始图片备份目录
# 训练参数
TRAINING_THRESHOLD = 600 # 收集一定新图片后开始训练
EPOCHS = 50 # 训练轮数
IMGSZ = 640 # 训练图像尺寸
BATCH = 8 # 训练批次大小
# 检测参数
CONFIDENCE_THRESHOLD = 0.5 # 检测置信度阈值
POLL_INTERVAL = 1 # 文件夹轮询间隔(秒)
STABLE_TIME = 0.5 # 文件稳定时间(秒)
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.bmp'} # 支持的图片格式
# 日志参数
MAX_LOG_SIZE = 10 * 1024 * 1024 # 单个日志文件最大大小 (10MB)
BACKUP_COUNT = 5 # 保留的备份日志文件数量
# 确保目录存在
Path(Config.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
Path(Config.NO_DETECTION_DIR).mkdir(parents=True, exist_ok=True)
Path(Config.NEW_MODEL_DIR).mkdir(parents=True, exist_ok=True)
Path(Config.LOG_DIR).mkdir(parents=True, exist_ok=True)
Path(Config.BACKUP_DIR).mkdir(parents=True, exist_ok=True)
# 全局变量
training_queue = queue.Queue() # 训练任务队列
new_images_count = 0 # 新图片计数器
model_lock = threading.Lock() # 模型更新锁
log_lock = threading.Lock() # 日志文件锁
exit_flag = threading.Event() # 程序退出标志
# 加载初始模型
model = YOLO(Config.MODEL_PATH)
# 日志管理器(使用阿拉伯数字序号)
class SequentialLogManager:
def __init__(self, log_dir):
self.log_dir = Path(log_dir)
self.loggers = {}
self.entry_counts = defaultdict(int) # 每个类别的条目计数
self.setup_logging()
def setup_logging(self):
"""配置日志系统"""
# 创建根日志记录器
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
# 控制台处理器
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
root_logger.addHandler(console_handler)
def get_logger(self, cls_name):
"""获取或创建特定类别的日志记录器"""
with log_lock:
if cls_name not in self.loggers:
# 创建新的日志记录器
logger = logging.getLogger(f"detection.{cls_name}")
logger.propagate = False # 防止传播到根日志
# 创建日志文件路径
log_file = self.log_dir / f"{cls_name}.log"
# 创建旋转文件处理器
file_handler = RotatingFileHandler(
log_file,
maxBytes=Config.MAX_LOG_SIZE,
backupCount=Config.BACKUP_COUNT
)
# 设置日志格式
formatter = logging.Formatter('%(asctime)s - %(message)s')
file_handler.setFormatter(formatter)
# 添加处理器
logger.addHandler(file_handler)
# 记录初始信息
logger.info(f"{'=' * 60}")
logger.info(f"Detection Log - Class: {cls_name}")
logger.info(f"Log Start Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
logger.info(f"{'=' * 60}")
# 初始化计数
self.entry_counts[cls_name] = 0
# 保存记录器
self.loggers[cls_name] = logger
return self.loggers[cls_name]
def parse_filename(self, filename):
"""解析文件名,提取日期、时间和长度信息"""
# 示例文件名: "Err2025-06-28 040014922L14397382_0_result"
pattern = r'Err(\d{4}-\d{2}-\d{2}) (\d{9})L(\d+)'
match = re.search(pattern, filename)
if match:
date = match.group(1) # 2025-06-28
time_str = match.group(2) # 040014922
length_mm = int(match.group(3)) # 14397382 (mm)
# 格式化时间: HH:mm:ss.fff
formatted_time = f"{time_str[0:2]}:{time_str[2:4]}:{time_str[4:6]}.{time_str[6:9]}"
# 转换毫米为米,保留3位小数
length_m = round(length_mm / 1000, 3)
return date, formatted_time, length_m
return None, None, None
def log_detection(self, cls_name, img_name):
"""记录检测日志并更新计数(使用阿拉伯数字)"""
date, time_str, length_m = self.parse_filename(img_name)
if date is None or time_str is None or length_m is None:
logging.warning(f"Cannot parse filename: {img_name}")
return
# 获取或创建日志记录器
logger = self.get_logger(cls_name)
# 更新计数
with log_lock:
self.entry_counts[cls_name] += 1
entry_num = self.entry_counts[cls_name]
# 创建日志条目(使用阿拉伯数字)
log_entry = f"{entry_num}. 日期: {date}, 时间: {time_str}, 长度: {length_m} 原编码: {img_name}"
# 记录日志
logger.info(log_entry)
def close_all(self):
"""关闭所有日志记录器并添加最终统计信息"""
for cls_name, logger in self.loggers.items():
# 获取日志处理器
for handler in logger.handlers:
if isinstance(handler, RotatingFileHandler):
# 记录最终统计信息
total_entries = self.entry_counts[cls_name]
logger.info(f"{'=' * 60}")
logger.info(f"Detection Class: {cls_name}")
logger.info(f"Total Entries: {total_entries}")
logger.info(f"Log End Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
logger.info(f"{'=' * 60}")
# 关闭处理器
handler.close()
# 移除处理器
logger.handlers = []
# 重置计数
self.entry_counts.clear()
self.loggers.clear()
def get_entry_counts(self):
"""获取所有类别的条目计数"""
with log_lock:
return dict(self.entry_counts)
# 创建全局日志管理器
log_manager = SequentialLogManager(Config.LOG_DIR)
def signal_handler(sig, frame):
"""处理Ctrl+C信号"""
print("\nCtrl+C pressed. Exiting gracefully...")
exit_flag.set() # 设置退出标志
# 注册信号处理器
signal.signal(signal.SIGINT, signal_handler)
def backup_original_image(image_path):
"""将原始图片移动到备份目录"""
try:
# 确保备份目录存在
Path(Config.BACKUP_DIR).mkdir(parents=True, exist_ok=True)
# 创建目标路径
img_name = Path(image_path).name
backup_path = Path(Config.BACKUP_DIR) / img_name
# 移动文件(如果目标已存在,则添加时间戳)
if backup_path.exists():
timestamp = time.strftime("%Y%m%d-%H%M%S")
new_name = f"{backup_path.stem}_{timestamp}{backup_path.suffix}"
backup_path = backup_path.with_name(new_name)
# 移动文件
shutil.move(image_path, backup_path)
return backup_path
except Exception as e:
logging.error(f"Failed to backup original image: {image_path} - {str(e)}")
return None
def process_image(image_path):
"""处理单张图片并保存结果,返回检测结果统计"""
global new_images_count
# 检查退出标志
if exit_flag.is_set():
return {}
# 读取原始图片
original_img = cv2.imread(image_path)
if original_img is None:
logging.error(f"Failed to read image: {image_path}")
return {}
# 获取图片名称用于日志记录
img_name = Path(image_path).stem
# 执行目标检测
results = model(image_path, conf=Config.CONFIDENCE_THRESHOLD)
# 检测结果计数器
detection_count = 0
class_files = defaultdict(list) # 按类别保存文件路径
# 处理检测结果
for result in results:
# 检查退出标志
if exit_flag.is_set():
return {}
# 统计检测到的目标数量
detection_count += len(result.boxes) if result.boxes is not None else 0
# 如果有检测结果
if result.boxes is not None and len(result.boxes) > 0:
# 绘制检测结果
plotted_img = result.plot()
# 获取检测到的类别
class_indices = result.boxes.cls.int().tolist()
# 为每个检测到的类别保存结果
for cls_idx in set(class_indices): # 使用set去重
cls_name = model.names[cls_idx]
cls_dir = Path(Config.OUTPUT_DIR) / cls_name
cls_dir.mkdir(parents=True, exist_ok=True)
# 保存路径
save_path = str(cls_dir / f"{Path(image_path).stem}_result{Path(image_path).suffix}")
cv2.imwrite(save_path, plotted_img)
class_files[cls_name].append(save_path)
# 记录日志
log_manager.log_detection(cls_name, img_name)
# 如果没有检测到任何目标
if detection_count == 0:
# 在图片上添加"未检测到目标"文本
annotated_img = original_img.copy()
text = "No Detection"
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
thickness = 2
color = (0, 0, 255) # 红色文本
# 获取文本尺寸并居中
text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
text_x = (annotated_img.shape[1] - text_size[0]) // 2
text_y = (annotated_img.shape[0] + text_size[1]) // 2
# 添加文本
cv2.putText(annotated_img, text, (text_x, text_y), font, font_scale, color, thickness)
# 保存到未检测文件夹
save_path = str(Path(Config.NO_DETECTION_DIR) / f"{Path(image_path).name}")
cv2.imwrite(save_path, annotated_img)
class_files["no_detection"] = [save_path]
# 记录未检测日志
log_manager.log_detection("no_detection", img_name)
# 备份原始图片
backup_path = backup_original_image(image_path)
if backup_path:
logging.info(f"Original image backed up to: {backup_path}")
else:
logging.warning(f"Failed to backup original image: {image_path}")
# 将处理后的图片添加到训练队列
training_queue.put(backup_path if backup_path else image_path)
new_images_count += 1
return class_files
def auto_retrain_model():
"""自动训练新模型并替换旧模型"""
global model, new_images_count
# 检查退出标志
if exit_flag.is_set():
logging.warning("Training canceled due to exit signal")
return False
# 创建数据集目录
dataset_dir = os.path.join(Config.NEW_MODEL_DIR, "auto_dataset")
images_dir = os.path.join(dataset_dir, "images", "train")
labels_dir = os.path.join(dataset_dir, "labels", "train")
os.makedirs(images_dir, exist_ok=True)
os.makedirs(labels_dir, exist_ok=True)
logging.info(f"Preparing training data, collected {new_images_count} new images")
# 处理训练队列中的所有图片
processed_count = 0
while not training_queue.empty() and not exit_flag.is_set():
try:
img_path = training_queue.get_nowait()
base_name = Path(img_path).name
# 复制图片到训练集
shutil.copy(img_path, os.path.join(images_dir, base_name))
# 生成对应的标签文件路径
label_path = os.path.join(labels_dir, f"{Path(img_path).stem}.txt")
# 如果标签文件不存在,创建一个空文件
if not os.path.exists(label_path):
open(label_path, 'w').close()
processed_count += 1
if processed_count % 10 == 0:
logging.info(f"Processed {processed_count}/{new_images_count} images")
except queue.Empty:
break
# 检查退出标志
if exit_flag.is_set():
logging.warning("Training canceled during dataset preparation")
return False
# 创建数据集配置文件
dataset_yaml_path = os.path.join(dataset_dir, "dataset.yaml")
with open(dataset_yaml_path, 'w') as f:
f.write(f"path: {dataset_dir}\n")
f.write("train: images/train\n")
f.write("val: images/train # Using same data for validation\n")
f.write("names:\n")
for idx, name in model.names.items():
f.write(f" {idx}: {name}\n")
logging.info(f"Training dataset prepared: {dataset_dir}")
logging.info(f"Starting model training (epochs={Config.EPOCHS}, batch={Config.BATCH})")
# 训练新模型
try:
with model_lock: # 锁定模型以防止在训练时被使用
if exit_flag.is_set():
logging.warning("Training canceled before starting")
return False
new_model = YOLO("yolov8n.yaml") # 使用YOLOv8n架构
results = new_model.train(
data=dataset_yaml_path,
epochs=Config.EPOCHS,
imgsz=Config.IMGSZ,
batch=Config.BATCH,
project=Config.NEW_MODEL_DIR,
name="auto_retrain",
exist_ok=True
)
except KeyboardInterrupt:
logging.warning("Training interrupted by user")
return False
# 定位最佳模型
best_model_path = os.path.join(Config.NEW_MODEL_DIR, "auto_retrain", "weights", "best.pt")
if os.path.exists(best_model_path):
# 备份旧模型
timestamp = time.strftime("%Y%m%d-%H%M%S")
backup_path = f"{Config.MODEL_PATH}.bak.{timestamp}"
shutil.copy(Config.MODEL_PATH, backup_path)
# 替换模型
shutil.copy(best_model_path, Config.MODEL_PATH)
# 重新加载新模型
with model_lock:
model = YOLO(Config.MODEL_PATH)
logging.info(f"Model successfully replaced: {Config.MODEL_PATH}")
logging.info(f"Old model backed up to: {backup_path}")
# 记录类别统计信息
entry_counts = log_manager.get_entry_counts()
logging.info("Class detection statistics:")
for cls, count in entry_counts.items():
logging.info(f" {cls}: {count} detections")
return True
else:
logging.error("Training failed, best model not found!!!")
return False
def monitor_folder():
"""监控文件夹并处理新图片"""
global new_images_count
logging.info(f"Monitoring folder: {Config.SOURCE_DIR}")
logging.info(f"Confidence threshold: {Config.CONFIDENCE_THRESHOLD}")
logging.info(f"Undetected images will be saved to: {Config.NO_DETECTION_DIR}")
logging.info(f"Original images will be backed up to: {Config.BACKUP_DIR}")
logging.info(f"Auto-training will start after collecting {Config.TRAINING_THRESHOLD} new images")
logging.info(f"Log files stored in: {Config.LOG_DIR}")
logging.info("Press Ctrl+C to exit gracefully...")
try:
while not exit_flag.is_set():
# 检查是否需要启动训练
if new_images_count >= Config.TRAINING_THRESHOLD and not exit_flag.is_set():
logging.warning("Reached training threshold, starting auto-training process...")
if auto_retrain_model():
# 重置计数器
new_images_count = 0
# 清空训练队列
while not training_queue.empty() and not exit_flag.is_set():
training_queue.get_nowait()
# 获取文件夹中的所有文件
for entry in Path(Config.SOURCE_DIR).iterdir():
if exit_flag.is_set():
break
if not entry.is_file():
continue
file_path = str(entry.resolve())
file_ext = entry.suffix.lower()
# 检查是否为支持的图片文件
if file_ext not in Config.IMAGE_EXTS:
continue
# 检查文件是否稳定
file_age = time.time() - entry.stat().st_mtime
if file_age < Config.STABLE_TIME:
continue
# 处理新图片
logging.info(f"Processing new image: {entry.name}")
result_files = process_image(file_path)
# 打印处理结果
if "no_detection" in result_files:
logging.info(f"No targets detected: {result_files['no_detection'][0]}")
else:
for cls_name, paths in result_files.items():
logging.info(f"Detected {cls_name}: {len(paths)} images")
# 显示当前新图片计数
logging.info(f"Current new images count: {new_images_count}/{Config.TRAINING_THRESHOLD}")
# 检查退出标志
if exit_flag.is_set():
break
time.sleep(Config.POLL_INTERVAL)
except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
finally:
# 在退出前检查是否有待处理的训练任务
if new_images_count > 0 and not exit_flag.is_set():
logging.warning(f"Detected {new_images_count} unprocessed images. Train new model? (y/n)")
if input().lower() == 'y':
auto_retrain_model()
# 关闭所有日志文件
log_manager.close_all()
logging.info("Log files closed with final statistics")
# 打印最终类别统计
entry_counts = log_manager.get_entry_counts()
if entry_counts:
logging.info("Final class detection statistics:")
for cls, count in entry_counts.items():
logging.info(f" {cls}: {count} detections")
else:
logging.info("No detection records")
logging.info("Program exited gracefully")
if __name__ == "__main__":
monitor_folder()
sys.exit(0)