import os
import sys
import cv2
import torch
import yaml
import numpy as np
from pathlib import Path
from ultralytics import YOLO
from datetime import datetime
import random
import albumentations as A
from sklearn.preprocessing import minmax_scale
# 增强的光圈模拟函数(添加颜色变化)
def simulate_halo(img, center, radius, intensity_range=(180, 255)):
"""模拟真实的光圈效果,引入颜色偏移"""
# 随机选择光圈类型
halo_type = random.choice(["gaussian", "radial", "composite", "chromatic"])
if halo_type == "chromatic":
# 色差光圈(不同颜色通道偏移)
halo_radius = int(radius * random.uniform(1.5, 3.0))
offset_x = random.randint(-5, 5)
offset_y = random.randint(-5, 5)
# 分别处理每个颜色通道
halo_img = np.zeros_like(img)
for c_idx, color in enumerate([(255, 0, 0), (0, 255, 0), (0, 0, 255)]): # RGB通道
channel_img = np.zeros_like(img)
offset_center = (center[0] + offset_x*(c_idx-1), center[1] + offset_y*(c_idx-1))
cv2.circle(channel_img, offset_center, halo_radius, color, -1)
# 应用高斯模糊
blur_size = 2 * int(halo_radius * 0.8) + 1
channel_img = cv2.GaussianBlur(channel_img, (blur_size, blur_size), halo_radius * 0.5)
# 叠加到主图像
halo_img = cv2.add(halo_img, channel_img)
# 应用光圈
alpha = random.uniform(0.3, 0.7)
img = cv2.addWeighted(img, 1, halo_img, alpha, 0)
return img
if halo_type == "gaussian":
# 高斯分布光圈
halo_intensity = random.randint(*intensity_range)
halo_radius = int(radius * random.uniform(1.5, 3.0))
# 创建光圈遮罩
mask = np.zeros_like(img[..., 0], dtype=np.float32)
cv2.circle(mask, center, halo_radius, 1.0, -1)
# 应用高斯模糊
blur_size = 2 * int(halo_radius * 0.8) + 1
mask = cv2.GaussianBlur(mask, (blur_size, blur_size), halo_radius * 0.5)
# 归一化并应用光圈
mask = mask / np.max(mask)
for c in range(3):
img[..., c] = np.clip(img[..., c] + mask * halo_intensity, 0, 255).astype(np.uint8)
elif halo_type == "radial":
# 径向渐变光圈
halo_intensity = random.randint(*intensity_range)
inner_radius = int(radius * random.uniform(0.8, 1.2))
outer_radius = int(radius * random.uniform(2.0, 4.0))
# 创建渐变光圈
for r in range(inner_radius, outer_radius + 1):
alpha = 1.0 - (r - inner_radius) / (outer_radius - inner_radius)
intensity = int(halo_intensity * alpha)
cv2.circle(img, center, r, (intensity, intensity, intensity), 1)
elif halo_type == "composite":
# 组合光圈(高斯+径向)
# 先添加径向渐变
simulate_halo(img, center, radius, intensity_range)
# 再添加高斯核心
simulate_halo(img, center, int(radius * 0.5), (intensity_range[0] + 50, intensity_range[1]))
return img
# 其他光圈类型保持不变...
# [原代码中的gaussian, radial, composite实现]
# 改进的负样本生成
def add_negative_samples(img, label_positions, num_negatives=5):
"""添加多样化的负样本(非目标物体)"""
height, width = img.shape[:2]
negative_shapes = []
# 避免负样本与真实目标重叠
safe_positions = []
attempts = 0
while len(safe_positions) < num_negatives and attempts < num_negatives * 3:
x = random.randint(20, width-20)
y = random.randint(20, height-20)
# 检查与真实目标的距离
too_close = False
for (cx, cy, r) in label_positions:
distance = np.sqrt((x - cx)**2 + (y - cy)**2)
if distance < r * 3: # 保持安全距离
too_close = True
break
if not too_close:
safe_positions.append((x, y))
attempts += 1
# 生成不同类型和颜色的负样本
for x, y in safe_positions:
shape_type = random.choice(["circle", "square", "triangle", "star", "line"])
color = (
random.randint(0, 255), # R
random.randint(0, 255), # G
random.randint(0, 255) # B
)
size = random.randint(3, 15)
if shape_type == "circle":
cv2.circle(img, (x, y), size, color, -1)
negative_shapes.append(("circle", x, y, size))
elif shape_type == "square":
half = size // 2
cv2.rectangle(img, (x-half, y-half), (x+half, y+half), color, -1)
negative_shapes.append(("square", x, y, size))
elif shape_type == "triangle":
pts = np.array([
[x, y-size],
[x-size, y+size],
[x+size, y+size]
], dtype=np.int32)
cv2.fillPoly(img, [pts], color)
negative_shapes.append(("triangle", x, y, size))
elif shape_type == "star":
# 简易星形
pts = []
for i in range(5):
angle = np.pi/2 + i*2*np.pi/5
outer_x = int(x + size * np.cos(angle))
outer_y = int(y + size * np.sin(angle))
inner_x = int(x + size/2 * np.cos(angle + np.pi/5))
inner_y = int(y + size/2 * np.sin(angle + np.pi/5))
pts.extend([(outer_x, outer_y), (inner_x, inner_y)])
cv2.fillPoly(img, [np.array(pts, dtype=np.int32)], color)
negative_shapes.append(("star", x, y, size))
elif shape_type == "line":
length = random.randint(10, 30)
angle = random.uniform(0, 2*np.pi)
end_x = int(x + length * np.cos(angle))
end_y = int(y + length * np.sin(angle))
thickness = random.randint(1, 3)
cv2.line(img, (x, y), (end_x, end_y), color, thickness)
negative_shapes.append(("line", x, y, length))
return img, negative_shapes
# 改进的数据集生成(添加负样本和颜色变化)
def create_robust_dataset(output_dir="black_dot_dataset", halo_prob=0.7):
"""创建带有多样性背景、光圈效果和负样本的训练数据集"""
# ... [背景生成代码保持不变] ...
output_path = Path(output_dir)
# 创建目录结构
(output_path/"images/train").mkdir(parents=True, exist_ok=True)
(output_path/"labels/train").mkdir(parents=True, exist_ok=True)
print(f"🔧 创建数据集于: {output_path.resolve()}")
print(f" 光圈出现概率: {halo_prob*100}%")
# 可能的背景变化(灰度值范围)
bg_ranges = [
(60, 100), # 标准背景
(100, 140), # 明亮背景
(30, 60), # 暗背景
(50, 90), # 中等背景
(120, 170) # 高亮背景(模拟过曝)
]
# 光圈强度范围
halo_intensities = [
(180, 220), # 中等光圈
(220, 255), # 强光圈
(150, 190), # 弱光圈
(200, 255) # 极强光圈
]
# 背景模式
modes = ["uniform", "gradient", "noisy", "textured"]
for i in range(1000): # 1000张训练图像
# 随机选择背景类型
bg_mode = random.choice(modes)
bg_min, bg_max = random.choice(bg_ranges)
# 创建基础背景
if bg_mode == "uniform":
# 均匀背景
base_gray = np.random.randint(bg_min, bg_max)
img = np.full((640, 640, 3), base_gray, dtype=np.uint8)
elif bg_mode == "gradient":
# 渐变背景
img = np.zeros((640, 640, 3), dtype=np.uint8)
start_gray = np.random.randint(bg_min, bg_max)
end_gray = np.random.randint(bg_min, bg_max)
for y in range(640):
gray_val = int(start_gray + (end_gray - start_gray) * y / 640)
img[y, :, :] = gray_val
elif bg_mode == "noisy":
# 噪声背景
base_gray = np.random.randint(bg_min, bg_max)
img = np.full((640, 640, 3), base_gray, dtype=np.uint8)
noise = np.random.normal(0, 15, (640, 640, 3)).astype(np.uint8)
img = cv2.add(img, noise)
elif bg_mode == "textured":
# 纹理背景(模拟表面不平整)
base_gray = np.random.randint(bg_min, bg_max)
img = np.full((640, 640, 3), base_gray, dtype=np.uint8)
# 添加纹理噪声
for _ in range(50):
x = np.random.randint(0, 640)
y = np.random.randint(0, 640)
size = np.random.randint(10, 100)
intensity = np.random.randint(-20, 20)
cv2.circle(img, (x, y), size,
(base_gray + intensity, base_gray + intensity, base_gray + intensity), -1)
# 添加颜色偏移增强数据多样性
color_shift = random.uniform(0.9, 1.1)
img = (img * color_shift).clip(0, 255).astype(np.uint8)
num_dots = np.random.randint(3, 15)
label_lines = []
dot_positions = []
for dot_idx in range(num_dots):
# ... [圆点生成代码保持不变] ...
# 随机位置和大小(更小的圆点)
x = np.random.randint(20, 620)
y = np.random.randint(20, 620)
radius = np.random.randint(1, 5) # 更小的半径(1-5像素)
dot_positions.append((x, y, radius))
# 随机决定是否添加光圈
if random.random() < halo_prob:
# 随机选择光圈强度
intensity_min, intensity_max = random.choice(halo_intensities)
# 模拟真实的光圈效果
img = simulate_halo(img, (x, y), radius, (intensity_min, intensity_max))
# 绘制黑色圆点(永远在最上层)
cv2.circle(img, (x, y), radius, (0, 0, 0), -1)
# YOLO格式标签(归一化坐标)
label_lines.append(f"0 {x/640:.6f} {y/640:.6f} {2*radius/640:.6f} {2*radius/640:.6f}")
# 随机添加干扰元素(避免重叠)
if random.random() > 0.3: # 70%概率添加干扰
for _ in range(random.randint(1, 5)):
# 随机干扰类型(线条或斑点)
if random.random() > 0.5:
# 线条干扰
color = np.random.randint(max(0, bg_min-20), min(255, bg_max+20))
pt1 = (np.random.randint(0, 640), np.random.randint(0, 640))
# 确保第二点不会与圆点重叠
valid_pt = False
attempts = 0
while not valid_pt and attempts < 10:
pt2 = (np.random.randint(0, 640), np.random.randint(0, 640))
valid_pt = True
for (dx, dy, dr) in dot_positions:
dist = np.sqrt((pt2[0]-dx)**2 + (pt2[1]-dy)**2)
if dist < dr + 10: # 保持安全距离
valid_pt = False
break
attempts += 1
cv2.line(img, pt1, pt2, (color, color, color), random.randint(1, 2))
else:
# 斑点干扰
valid_spot = False
attempts = 0
while not valid_spot and attempts < 10:
spot_x = np.random.randint(0, 640)
spot_y = np.random.randint(0, 640)
spot_radius = np.random.randint(1, 3)
# 检查是否与任何圆点重叠
overlap = False
for (dx, dy, dr) in dot_positions:
dist = np.sqrt((spot_x-dx)**2 + (spot_y-dy)**2)
if dist < dr + spot_radius + 5: # 保持安全距离
overlap = True
break
if not overlap:
valid_spot = True
spot_color = np.random.randint(max(0, bg_min-20), min(255, bg_max+20))
cv2.circle(img, (spot_x, spot_y), spot_radius,
(spot_color, spot_color, spot_color), -1)
attempts += 1
# 随机添加额外的光圈干扰(没有黑色圆点)
if random.random() < halo_prob * 0.3: # 30%的概率添加干扰光圈
num_false_halos = random.randint(1, 3)
for _ in range(num_false_halos):
halo_x = np.random.randint(20, 620)
halo_y = np.random.randint(20, 620)
halo_radius = random.randint(2, 8)
intensity_min, intensity_max = random.choice(halo_intensities)
img = simulate_halo(img, (halo_x, halo_y), halo_radius, (intensity_min, intensity_max))
# 添加负样本(非目标物体)
num_negatives = random.randint(0, 8) # 随机数量的负样本
img, negative_shapes = add_negative_samples(img, dot_positions, num_negatives)
# 保存图像和标签(只记录目标圆点)
# ... [保持不变] ...
img_path = output_path/f"images/train/img_{i:04d}.jpg"
cv2.imwrite(str(img_path), img)
label_path = output_path/f"labels/train/img_{i:04d}.txt"
with open(label_path, "w") as f:
f.write("\n".join(label_lines))
print(f"✅ 数据集创建完成: {len(list((output_path/'images/train').glob('*.jpg')))} 张图像")
return output_path.resolve()
# 创建可靠的YAML配置(不变)
def create_dataset_config(data_dir):
"""创建绝对路径安全的YAML配置文件"""
data_path = Path(data_dir)
# 验证数据集结构
if not (data_path/"images/train").exists():
raise FileNotFoundError(f"数据集结构错误: 缺少 {data_path/'images/train'}")
config = {
'path': str(data_path),
'train': 'images/train',
'val': 'images/train', # 简单验证使用相同数据
'names': {0: 'black_dot'},
'download': False
}
yaml_path = data_path.parent / "black_dot_config.yaml"
with open(yaml_path, 'w') as f:
yaml.dump(config, f, sort_keys=False)
print(f"⚙️ 配置文件创建于: {yaml_path}")
return yaml_path
# 增强的检测后处理函数
def postprocess_detections(results, frame, shape_threshold=0.7, color_threshold=0.1):
"""
过滤误检测:基于形状和颜色特征
:param results: YOLO检测结果
:param frame: 原始图像帧
:param shape_threshold: 圆形度阈值(0-1)
:param color_threshold: 颜色纯度阈值(0-1)
:return: 过滤后的检测结果
"""
filtered_boxes = []
filtered_confidences = []
filtered_classes = []
# 获取原始检测结果
boxes = results[0].boxes.xyxy.cpu().numpy()
confidences = results[0].boxes.conf.cpu().numpy()
classes = results[0].boxes.cls.cpu().numpy()
# 将图像转换为HSV颜色空间进行颜色分析
hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
for i in range(len(boxes)):
x1, y1, x2, y2 = boxes[i]
conf = confidences[i]
cls_id = classes[i]
# 提取检测区域
roi = frame[int(y1):int(y2), int(x1):int(x2)]
hsv_roi = hsv_frame[int(y1):int(y2), int(x1):int(x2)]
# 计算形状特征(圆形度)
if roi.size > 0:
# 二值化处理
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# 计算轮廓
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# 获取最大轮廓
max_contour = max(contours, key=cv2.contourArea)
# 计算轮廓面积和凸包面积
contour_area = cv2.contourArea(max_contour)
convex_hull = cv2.convexHull(max_contour)
hull_area = cv2.contourArea(convex_hull)
# 计算圆形度(圆形的圆形度接近1)
circularity = (4 * np.pi * contour_area) / (cv2.arcLength(max_contour, True)**2) if contour_area > 0 else 0
# 计算凸性缺陷(凸包的缺陷越少越接近圆形)
defects = cv2.convexityDefects(max_contour, cv2.convexHull(max_contour, returnPoints=False))
convexity = 1 - (len(defects) / 10) if defects is not None else 1.0
# 综合形状得分
shape_score = minmax_scale([(circularity + convexity) / 2])[0]
# 计算颜色特征(黑色纯度)
avg_color = np.mean(roi, axis=(0, 1))
blackness = 1 - np.mean(avg_color) / 255
# 计算颜色纯度(黑色物体应具有低饱和度)
saturation = np.mean(hsv_roi[:, :, 1]) / 255
color_purity = minmax_scale([1 - saturation])[0]
# 综合颜色得分
color_score = minmax_scale([(blackness + color_purity) / 2])[0]
# 形状和颜色得分都高于阈值才保留
if shape_score > shape_threshold and color_score > color_threshold:
filtered_boxes.append(boxes[i])
filtered_confidences.append(conf)
filtered_classes.append(cls_id)
# 更新结果对象
if filtered_boxes:
results[0].boxes = type(results[0].boxes)(
torch.tensor(np.array(filtered_boxes)),
torch.tensor(np.array(filtered_confidences)),
torch.tensor(np.array(filtered_classes))
)
else:
results[0].boxes = type(results[0].boxes)(torch.empty((0, 6))) # 空结果
return results
# 改进的训练函数(添加负样本增强)
def train_model_safe(config_path):
"""改进的训练函数,使用负样本增强"""
# ... [前面代码保持不变] ...
config_path = Path(config_path)
# 验证配置文件
if not config_path.exists():
raise FileNotFoundError(f"配置文件不存在: {config_path}")
# 设置Ultralytics环境
os.environ['ULTRALYTICS_DATASETS'] = str(config_path.parent)
print("🚀 开始训练YOLOv8模型...")
print(f" 使用配置文件: {config_path}")
try:
# 加载模型
model = YOLO("yolov8n.pt")
# 训练参数 - 添加针对光圈的增强
train_args = {
# ... [其他参数保持不变] ...
'data': str(config_path),
'epochs': 1000, # 增加训练轮数
'imgsz': 640,
'batch': 8,
'device': '0' if torch.cuda.is_available() else 'cpu',
'project': "black_dot_detection",
'name': f"exp_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
'exist_ok': True,
'single_cls': True,
'optimizer': 'Adam',
'lr0': 0.01,
'close_mosaic': 20,
'augment': True, # 启用增强
'degrees': 45.0, # 旋转角度范围
'translate': 0.2, # 平移范围
'scale': 0.8, # 缩放范围
'shear': 10.0, # 裁剪范围
'hsv_h': 0.015, # 色相增强
'hsv_s': 0.7, # 饱和度增强
'hsv_v': 0.4, # 明度增强
'flipud': 0.5, # 垂直翻转
'fliplr': 0.5, # 水平翻转
'mosaic': 1.0, # 使用马赛克增强
'mixup': 0.1, # 使用mixup增强
'copy_paste': 0.1, # 使用复制粘贴增强
'erasing': 0.4, # 随机擦除
'auto_augment': 'randaugment', # 自动增强
}
# 启动训练
results = model.train(**train_args)
# 获取最佳模型路径
model_dir = Path(results.save_dir) / "weights" / "best.pt"
print(f"🎉 训练完成! 最佳模型保存于: {model_dir}")
return model_dir
except Exception as e:
print(f"❌ 训练失败: {str(e)}")
print("排查建议:")
print("1. 检查配置文件路径是否正确")
print("2. 确保数据集包含至少100张图像")
print("3. 尝试降低batch_size值")
print("4. 更新ultralytics包: pip install --upgrade ultralytics")
sys.exit(1)
# 主程序
if __name__ == "__main__":
# ... [主程序代码保持不变] ...
print("="*50)
print("基于YOLOv8的黑色圆点检测系统(抗光圈干扰版)")
print("="*50)
# 1. 路径安全检查
# (validate_system_paths函数保持不变)
# 2. 创建数据集目录在当前脚本所在目录
script_dir = Path(__file__).parent.resolve()
dataset_dir = script_dir / "black_dot_dataset"
# 3. 创建改进的数据集
if not dataset_dir.exists():
print("\n[阶段1] 创建抗光圈干扰的训练数据集")
dataset_dir = create_robust_dataset(dataset_dir, halo_prob=0.7)
else:
print(f"\n[阶段1] 使用现有数据集: {dataset_dir}")
# 4. 创建配置文件
print("\n[阶段2] 创建数据集配置")
config_path = create_dataset_config(dataset_dir)
# 5. 训练改进的模型
print("\n[阶段3] 训练抗光圈干扰模型")
model_path = train_model_safe(config_path)
print("\n" + "="*50)
print(f"处理完成! 最佳模型保存于: {model_path}")
print("="*50)
阅读代码,改代码能识别较模糊的目标吗
最新发布