yolov5代码详解--3.python代码脚本

三炭先生
已于 2025-03-07 14:45:04 修改
阅读量326
点赞数 3
分类专栏： yolo算法文章标签： YOLO 算法 python
于 2025-03-07 14:43:13 首次发布
本文链接：https://blog.youkuaiyun.com/Tony_33/article/details/146072084
版权
yolo算法专栏收录该内容
8 篇文章
订阅专栏
三、val.py

val.py的主要作用是对训练好的模型进行验证（或评估）。具体来说，它用于在指定的验证集上评估模型的性能，计算各项评估指标，并输出结果。val.py通常在模型训练完成后运行，用于验证模型的检测精度、召回率、平均精度（mAP）等指标，以确保模型的泛化能力。
参数作用总结

data: 数据集的配置文件路径，通常为.yaml文件，定义了数据集的信息。可以调整以支持不同的数据集。

weights: 预训练模型的路径，可以选择不同的YOLOv5版本的权重文件。不同权重适应不同的任务，选择适当的权重文件。

batch_size: 每次推理的图像批处理大小，通常根据显存大小进行调整。较大的批次有助于加速推理，但需要更大的显存。

imgsz: 图像尺寸，影响推理速度和准确性。较大的图像尺寸通常会提高准确度，但会降低推理速度。

conf_thres: 置信度阈值，设置为较低的值（如0.001）可以提高检测到目标的概率，但可能会导致较多误检。可以根据需要调整。

iou_thres: NMS（非极大值抑制）的IoU阈值，控制预测框合并的严格度。较高的值会导致更多的框被抑制，减少重叠。

max_det: 每张图片的最大检测数。设置过高的值可能会导致不必要的计算，过低则可能漏检目标。

task: 任务类型，可以是训练、验证、测试、速度评估等。调整此参数来执行不同类型的任务。

device: 设备选择，指定使用的硬件设备（如cuda:0）或cpu。

single_cls: 如果数据集只包含单一类别物体，可以设置为True来简化模型训练与推理。

save_txt: 是否保存预测结果为txt文件。如果需要后续处理或分析，设置为True。

save_json: 是否保存为COCO格式的JSON文件，适用于COCO评估。
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Validate a trained YOLOv5 detection model on a detection dataset

Usage:
    $ python val.py --weights yolov5s.pt --data coco128.yaml --img 640
"""
# 引入必要的库
import argparse  # 用于命令行参数解析
import json  # 用于JSON文件的读写
import os  # 用于操作系统功能（路径、目录等）
import sys  # 用于系统操作，主要是调整模块路径
from pathlib import Path  # 用于路径操作

import numpy as np  # 用于处理数值运算
import torch  # 用于深度学习模型的运算
from tqdm import tqdm  # 用于显示进度条

# 配置文件路径及根目录
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # 获取YOLOv5的根目录路径
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # 将根目录添加到系统路径中
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # 相对路径

# 导入自定义模块
from models.common import DetectMultiBackend  # YOLOv5模型推理处理
from utils.callbacks import Callbacks  # 回调函数，用于在各个阶段执行额外的操作
from utils.dataloaders import create_dataloader  # 数据加载器
from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements,
                           check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression,
                           print_args, scale_boxes, xywh2xyxy, xyxy2xywh)  # 各种常用工具函数
from utils.metrics import ConfusionMatrix, ap_per_class, box_iou  # 性能评估函数
from utils.plots import output_to_target, plot_images, plot_val_study  # 可视化函数
from utils.torch_utils import select_device, smart_inference_mode  # 深度学习设备选择与推理模式

# 保存预测框为文本格式
def save_one_txt(predn, save_conf, shape, file):
    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
    for *xyxy, conf, cls in predn.tolist():
        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
        with open(file, 'a') as f:
            f.write(('%g ' * len(line)).rstrip() % line + '\n')

# 保存预测框为COCO格式的JSON
def save_one_json(predn, jdict, path, class_map):
    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
    box = xyxy2xywh(predn[:, :4])  # xywh
    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
    for p, b in zip(predn.tolist(), box.tolist()):
        jdict.append({
            'image_id': image_id,
            'category_id': class_map[int(p[5])],
            'bbox': [round(x, 3) for x in b],
            'score': round(p[4], 5)})

# 处理检测结果与真实标签的匹配
def process_batch(detections, labels, iouv):
    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)  # 初始化正确矩阵
    iou = box_iou(labels[:, 1:], detections[:, :4])  # 计算预测框与真实框的IoU
    correct_class = labels[:, 0:1] == detections[:, 5]  # 类别匹配
    for i in range(len(iouv)):  # 遍历各个IoU阈值
        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > 阈值且类别匹配
        if x[0].shape[0]:
            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
            if x[0].shape[0] > 1:
                matches = matches[matches[:, 2].argsort()[::-1]]  # 排序IoU
                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]  # 去除重复的检测
                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]  # 去除重复的标签
            correct[matches[:, 1].astype(int), i] = True
    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)

# 主运行函数
@smart_inference_mode()  # 用于智能推理模式，自动控制GPU/CPU切换
def run(
        data,  # 数据集配置文件
        weights=None,  # 训练好的模型路径
        batch_size=32,  # 批处理大小
        imgsz=640,  # 输入图像的大小
        conf_thres=0.001,  # 置信度阈值
        iou_thres=0.6,  # IoU阈值，用于非极大值抑制
        max_det=300,  # 每张图片的最大检测框数量
        task='val',  # 任务类型：val、train、test等
        device='',  # 使用的设备，'cpu' 或 'cuda' 
        workers=8,  # 数据加载的最大工作线程数
        single_cls=False,  # 是否使用单类数据集
        augment=False,  # 是否进行增强推理
        verbose=False,  # 是否输出详细的每类信息
        save_txt=False,  # 是否保存检测结果为txt文件
        save_hybrid=False,  # 是否保存混合标签和预测框结果
        save_conf=False,  # 是否保存置信度
        save_json=False,  # 是否保存为COCO格式的JSON文件
        project=ROOT / 'runs/val',  # 保存结果的目录路径
        name='exp',  # 保存结果的文件夹名称
        exist_ok=False,  # 如果结果目录已存在，是否覆盖
        half=True,  # 是否使用FP16半精度推理
        dnn=False,  # 是否使用OpenCV DNN进行ONNX推理
        model=None,  # 模型对象
        dataloader=None,  # 数据加载器
        save_dir=Path(''),  # 保存路径
        plots=True,  # 是否绘制可视化图像
        callbacks=Callbacks(),  # 回调函数
        compute_loss=None,  # 计算损失的函数
):
    # 初始化或加载模型
    training = model is not None  # 是否为训练模式
    if training:  # 训练模式
        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # 获取设备信息
        half &= device.type != 'cpu'  # 只有在GPU上支持半精度
        model.half() if half else model.float()  # 根据是否启用FP16选择浮点精度
    else:  # 推理模式
        device = select_device(device, batch_size=batch_size)  # 选择推理设备

        # 创建保存目录
        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # 如果目录已存在，增加一个后缀
        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # 创建labels目录

        # 加载模型
        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)  # 加载预训练模型
        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine  # 获取模型的步幅等信息
        imgsz = check_img_size(imgsz, s=stride)  # 校验图像大小
        half = model.fp16  # 是否使用FP16
        if engine:
            batch_size = model.batch_size
        else:
            device = model.device
            if not (pt or jit):
                batch_size = 1  # 默认单图推理
                LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')

        # 数据集检查
        data = check_dataset(data)  # 校验数据集

    # 配置模型为评估模式
    model.eval()
    cuda = device.type != 'cpu'  # 是否使用CUDA
    is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt')  # 是否为COCO数据集
    nc = 1 if single_cls else int(data['nc'])  # 数据集类别数
    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # IOU阈值列表

    # 加载数据集
    if not training:
        if pt and not single_cls:  # 检查权重文件是否与数据集类别匹配
            ncm = model.model.nc
            assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
                              f'classes). Pass correct combination of --weights and --data that are trained together.'
        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # 模型预热
        task = task if task in ('train', 'val', 'test') else 'val'  # 选择任务类型：train/val/test
        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, workers=workers)[0]  # 创建数据加载器

    # 运行检测
    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)  # 混淆矩阵
    names = model.names if hasattr(model, 'names') else model.module.names  # 类别名称
    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))  # 如果是COCO数据集，映射类别ID
    # 输出格式配置
    s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95')
    tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    dt = Profile(), Profile(), Profile()  # 初始化性能分析
    loss = torch.zeros(3, device=device)  # 初始化损失
    jdict, stats, ap, ap_class = [], [], [], []  # 初始化统计数据

    # 进度条显示
    pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)  # 进度条
    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
        # 处理每个批次的图像及其标签
        callbacks.run('on_val_batch_start')  # 调用回调函数

        with dt[0]:
            if cuda:
                im = im.to(device, non_blocking=True)  # 将输入数据传输到GPU
                targets = targets.to(device)  # 标签数据传输到GPU
            im = im.half() if half else im.float()  # 数据转换为半精度或单精度
            im /= 255  # 将图像的像素值归一化到0-1之间
            nb, _, height, width = im.shape  # 获取批次大小和图像的高宽

        # 推理过程
        with dt[1]:
            preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None)

        # 计算损失（训练时使用）
        if compute_loss:
            loss += compute_loss(train_out, targets)[1]  # 计算并累计损失

        # 非极大值抑制（NMS）处理
        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # 转换标签为像素
        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # 混合标签
        with dt[2]:
            preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det)

        # 计算每个检测结果
        for si, pred in enumerate(preds):
            labels = targets[targets[:, 0] == si, 1:]  # 获取标签
            nl, npr = labels.shape[0], pred.shape[0]  # 标签和预测框的数量
            path, shape = Path(paths[si]), shapes[si][0]  # 图像路径和大小
            correct = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # 初始化正确矩阵
            seen += 1

            if npr == 0:
                if nl:
                    stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0]))
                    if plots:
                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
                continue

            # 预测框后处理
            if single_cls:
                pred[:, 5] = 0  # 如果是单类检测，设定类别为0
            predn = pred.clone()
            scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # 还原预测框到原始尺寸

            # 评估
            if nl:
                tbox = xywh2xyxy(labels[:, 1:5])  # 真实标签的坐标转换
                scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])  # 恢复真实框的尺寸
                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # 合并类别和坐标信息
                correct = process_batch(predn, labelsn, iouv)  # 计算正确匹配
                if plots:
                    confusion_matrix.process_batch(predn, labelsn)  # 更新混淆矩阵
            stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0]))  # 统计信息

            # 保存结果
            if save_txt:
                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')  # 保存txt格式结果
            if save_json:
                save_one_json(predn, jdict, path, class_map)  # 保存JSON格式结果

            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])  # 结束回调

        # 绘制图像
        if plots and batch_i < 3:
            plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # 绘制标签图
            plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # 绘制预测图

        callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds)  # 结束回调

    # 计算并打印指标
    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # 转换为numpy格式
    if len(stats) and stats[0].any():
        tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
        ap50, ap = ap[:, 0], ap.mean(1)  # 计算mAP@0.5和mAP@0.5:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()  # 计算平均精度和召回率
    nt = np.bincount(stats[3].astype(int), minlength=nc)  # 每个类别的目标数量

    # 打印结果
    pf = '%22s' + '%11i' * 2 + '%11.3g' * 4  # 输出格式
    LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
    if nt.sum() == 0:
        LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels')

    # 打印每个类别的结果
    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # 计算推理速度
    t = tuple(x.t / seen * 1E3 for x in dt)  # 每张图的推理时间
    if not training:
        shape = (batch_size, 3, imgsz, imgsz)
        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)

    # 绘制混淆矩阵
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)

    # 保存JSON文件
    if save_json and len(jdict):
        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # 权重文件名
        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # COCO标注文件
        pred_json = str(save_dir / f"{w}_predictions.json")  # 保存预测结果的JSON文件
        LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...')

        with open(pred_json, 'w') as f:
            json.dump(jdict, f)

        # 使用pycocotools进行mAP评估
        try:
            check_requirements('pycocotools')
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # 初始化COCO API
            pred = anno.loadRes(pred_json)  # 加载预测结果
            eval = COCOeval(anno, pred, 'bbox')  # 评估
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:2]  # 更新结果（mAP@0.5:0.95, mAP@0.5）
        except Exception as e:
            LOGGER.info(f'pycocotools unable to run: {e}')

    # 返回结果
    model.float()  # 恢复模型为训练模式
    if not training:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t

def parse_opt():
    # 创建一个命令行参数解析器
    parser = argparse.ArgumentParser()

    # 添加参数用于指定数据集配置文件的路径
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')

    # 添加参数用于指定模型的路径，支持多个模型路径
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')

    # 添加参数用于指定批处理大小
    parser.add_argument('--batch-size', type=int, default=32, help='batch size')

    # 添加参数用于指定推理图像的尺寸（像素）
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')

    # 添加参数用于指定置信度阈值
    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')

    # 添加参数用于指定NMS的IoU阈值
    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')

    # 添加参数用于指定每张图片的最大检测框数
    parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image')

    # 添加参数用于指定任务类型（train, val, test, speed, study）
    parser.add_argument('--task', default='val', help='train, val, test, speed or study')

    # 添加参数用于指定运行设备（例如CUDA设备，CPU等）
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

    # 添加参数用于指定数据加载器的最大工作线程数
    parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')

    # 添加参数用于指定是否将数据集视为单类数据集
    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')

    # 添加参数用于指定是否进行增强推理
    parser.add_argument('--augment', action='store_true', help='augmented inference')

    # 添加参数用于指定是否输出每个类别的mAP信息
    parser.add_argument('--verbose', action='store_true', help='report mAP by class')

    # 添加参数用于指定是否保存结果为文本文件（txt格式）
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')

    # 添加参数用于指定是否保存混合标签和预测结果（txt格式）
    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')

    # 添加参数用于指定是否保存置信度
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')

    # 添加参数用于指定是否保存为COCO格式的JSON文件
    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')

    # 添加参数用于指定保存路径的根目录
    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')

    # 添加参数用于指定保存路径的子目录
    parser.add_argument('--name', default='exp', help='save to project/name')

    # 添加参数用于指定是否允许存在重复的项目名
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')

    # 添加参数用于指定是否使用FP16进行推理（半精度）
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')

    # 添加参数用于指定是否使用OpenCV DNN进行ONNX推理
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')

    # 解析命令行参数
    opt = parser.parse_args()

    # 检查数据集配置文件是否有效（例如是否符合YAML格式）
    opt.data = check_yaml(opt.data)

    # 如果数据集是COCO格式，则保存JSON文件
    opt.save_json |= opt.data.endswith('coco.yaml')

    # 如果保存混合结果，则也保存文本文件
    opt.save_txt |= opt.save_hybrid

    # 打印所有解析到的参数
    print_args(vars(opt))

    # 返回解析后的参数
    return opt

def main(opt):
    # 检查依赖项，排除tensorboard和thop库
    check_requirements(exclude=('tensorboard', 'thop'))

    # 如果任务是训练、验证或测试，正常运行
    if opt.task in ('train', 'val', 'test'):
        # 如果置信度阈值大于0.001，发出警告
        if opt.conf_thres > 0.001:
            LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results')
        
        # 如果保存混合结果，发出警告
        if opt.save_hybrid:
            LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone')

        # 执行主运行函数（验证）
        run(**vars(opt))

    else:
        # 如果任务是speed（速度评估），则调整参数并进行评估
        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]  # 确保权重是一个列表
        opt.half = torch.cuda.is_available() and opt.device != 'cpu'  # 如果使用GPU且支持，启用FP16
        if opt.task == 'speed':  # 速度基准测试
            # 调整conf_thres、iou_thres和save_json，进行速度测试
            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
            for opt.weights in weights:
                run(**vars(opt), plots=False)  # 不显示图像，仅进行速度评估

        # 如果任务是study（速度与mAP的比较），则进行实验
        elif opt.task == 'study':  # 速度与mAP基准测试
            for opt.weights in weights:
                # 生成文件名用于保存实验结果
                f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt'
                x, y = list(range(256, 1536 + 128, 128)), []  # x轴为图像尺寸，y轴为对应结果
                for opt.imgsz in x:  # 调整图像尺寸进行实验
                    LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...')
                    r, _, t = run(**vars(opt), plots=False)
                    y.append(r + t)  # 结果与时间
                np.savetxt(f, y, fmt='%10.4g')  # 保存实验结果到文本文件
            os.system('zip -r study.zip study_*.txt')  # 打包所有实验结果
            plot_val_study(x=x)  # 绘制实验结果图

# 程序入口，执行parse_opt()和main()函数
if __name__ == "__main__":
    opt = parse_opt()  # 解析命令行参数
    main(opt)  # 执行主函数