YOLOV8 det 多batch TensorRT 推理(python )

由于我当前的项目需求是推理四张,所以,demo部分也是基于4张进行演示的,不过基于此套路,可以实现 N C H W的任意尺度推理,推理代码如下:

import numpy as np
from numpy import ndarray
from typing import List, Tuple, Union

from models import TRTModule  # isort:skip
import argparse
from pathlib import Path

import cv2
import torch
from torch import Tensor

from config import CLASSES, COLORS
from models.torch_utils import det_postprocess
from models.utils import blob, letterbox, path_to_list


def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    If multiple boxes have the exact same score and satisfy the IoU
    criterion with respect to a reference box, the selected box is
    not guaranteed to be the same between CPU and GPU. This is similar
    to the behavior of argsort in PyTorch when repeated values are present.

    Args:
        boxes (Tensor[N, 4])): boxes to perform NMS on. They
            are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and
            ``0 <= y1 < y2``.
        scores (Tensor[N]): scores for each one of the boxes
        iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold

    Returns:
        Tensor: int64 tensor with the indices of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)

def blob1(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]:
    seg = None
    if return_seg:
        seg = im.astype(np.float32) / 255
    im = im.transpose([2, 0, 1])
    # im = im[np.newaxis, ...]
    im = np.ascontiguousarray(im).astype(np.float32) / 255
    if return_seg:
        return im, seg
    else:
        return im


def det_postprocess1(data: Tuple[Tensor, Tensor, Tensor, Tensor], i:int):
    assert len(data) == 4
    iou_thres: float = 0.65
    num_dets, bboxes, scores, labels = data[0][i], data[1][i], data[2][
        i], data[3][i]
    nums = num_dets.item()
    if nums == 0:
        return bboxes.new_zeros((0, 4)), scores.new_zeros(
            (0, )), labels.new_zeros((0, ))
    # check score negative
    scores[scores < 0] = 1 + scores[scores < 0]
    # add nms
    idx = nms(bboxes, scores, iou_thres)
    bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx]
    bboxes = bboxes[:nums]
    scores = scores[:nums]
    labels = labels[:nums]

    return bboxes, scores, labels


def main(args: argparse.Namespace) -> None:
    device = torch.device(args.device)
    Engine = TRTModule(args.engine, device)
    H, W = Engine.inp_info[0].shape[-2:]

    # set desired output names order
    Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])

    images = path_to_list(args.imgs)
    save_path = Path(args.out_dir)

    if not args.show and not save_path.exists():
        save_path.mkdir(parents=True, exist_ok=True)


    draw_list = []
    save_path_list = []
    img_list = []
    ratio_list = []
    dwdh_list = []
    for image in images:
        save_image = save_path / image.name
        bgr = cv2.imread(str(image))
        draw_list.append(bgr)
        save_path_list.append(save_image)

        bgr, ratio, dwdh = letterbox(bgr, (W, H))

        ratio_list.append(ratio)

        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        tensor = blob1(rgb, return_seg=False)
        dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
        dwdh_list.append(dwdh)
        tensor = torch.asarray(tensor, device=device)
        img_list.append(tensor)
    batch_tensor = torch.stack([img_list[0], img_list[1], img_list[2], img_list[3]], dim=0)
    print(batch_tensor.shape)
    data = Engine(batch_tensor)
    print(data)
    print(len(data))
    print(len(data[0]))

    for i in range(len(data[0])):
        bboxes, scores, labels = det_postprocess1(data, i)
        bboxes -= dwdh_list[i]
        bboxes /= ratio_list[i]

        for (bbox, score, label) in zip(bboxes, scores, labels):
            bbox = bbox.round().int().tolist()
            cls_id = int(label)
            cls = CLASSES[cls_id]
            color = COLORS[cls]
            cv2.rectangle(draw_list[i], bbox[:2], bbox[2:], color, 2)
            cv2.putText(draw_list[i],
                        f'{cls}:{score:.3f}', (bbox[0], bbox[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.75, [225, 255, 255],
                        thickness=2)
        if args.show:
            cv2.imshow('result', draw_list[i])
            cv2.waitKey(0)
        else:
            cv2.imwrite(str(save_path_list), draw_list[i])


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', default="yolov8n_bach4.engine",type=str, help='Engine file')
    parser.add_argument('--imgs',default="data", type=str, help='Images file')
    parser.add_argument('--show', default=True,
                        action='store_true',
                        help='Show the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='TensorRT infer device')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)

模型输入尺寸:

torch.Size([4, 3, 640, 640])


模型输出:

(tensor([[5],
        [5],
        [3],
        [3]], device='cuda:0', dtype=torch.int32), tensor([[[477.7500, 224.0000, 560.0000, 521.5000],
         [211.2500, 241.5000, 283.5000, 507.0000],
         [109.8750, 235.8750, 224.6250, 536.0000],
         ...,
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000]],

        [[477.7500, 224.0000, 560.0000, 521.5000],
         [211.2500, 241.5000, 283.5000, 507.0000],
         [109.8750, 235.8750, 224.6250, 536.0000],
         ...,
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000]],

        [[373.5000, 160.6250, 572.0000, 494.7500],
         [ 59.7500, 238.1250, 555.0000, 495.2500],
         [218.5000, 358.5000, 261.7500, 497.0000],
         ...,
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000]],

        [[373.5000, 160.6250, 572.0000, 494.7500],
         [ 59.7500, 238.1250, 555.0000, 495.2500],
         [218.5000, 358.5000, 261.7500, 497.0000],
         ...,
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   0.0000]]], device='cuda:0'), tensor([[0.9014, 0.8833, 0.8755, 0.8418, 0.4358, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.9014, 0.8833, 0.8755, 0.8418, 0.4358, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.8052, 0.7959, 0.3684, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.8052, 0.7959, 0.3684, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000]], device='cuda:0'), tensor([[ 0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0, 27,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0, 27,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0]], device='cuda:0',
       dtype=torch.int32))

batch推理后的结果中,对应的num_dets、bboxes、scores、labels输出维度变化如下:
num_dets:[1]   变为  [4, 1]

bboxes: [100,  4]      变为 [ 4 , 100, 4]

scores:  [100]      变为  [4 ,100]

labels:  [80]       变为  [4, 80]

推理结果图片如下:

后续抽空再补上TensorRT  C++的推理部分。


本完整项目链接如下:
https://download.youkuaiyun.com/download/weixin_59701401/89767509

<think>我们使用YOLOv8进行目标识别,包括训练和推理两个主要部分。YOLOv8由Ultralytics公司开发,是目前最先进的目标检测模型之一。我们将使用Python实现,包括以下步骤: 1. 安装必要的库 2. 准备数据集(使用之前准备好的YOLO格式数据集) 3. 训练模型 4. 使用训练好的模型进行推理(图像、视频、摄像头) 5. 评估模型性能 注意:由于YOLOv11并非官方版本,我们这里使用YOLOv8。如果确实需要YOLOv11,可能需要自行寻找社区实现,但本方案以官方YOLOv8为准。 步骤1:安装库 首先,我们需要安装ultralytics库,它包含了YOLOv8的实现。 步骤2:训练模型 使用YOLOv8的训练接口,只需几行代码即可开始训练。 步骤3:推理 使用训练好的模型进行目标检测。 下面我们给出详细代码和解释。</think># Python YOLOv8目标识别系统实现 下面我将详细介绍如何使用Python实现基于YOLOv8目标识别系统,包括训练、推理和部署完整流程。 ## 系统架构 ``` yolov8-system/ ├── train.py # 模型训练脚本 ├── detect.py # 推理检测脚本 ├── utils.py # 工具函数 ├── requirements.txt # 依赖库 ├── data/ # 数据集 │ ├── images/ │ ├── labels/ │ └── dataset.yaml ├── models/ # 预训练模型 └── results/ # 训练结果 ``` ## 1. 环境配置 ```bash # 安装依赖 pip install ultralytics opencv-python numpy pandas matplotlib seaborn tqdm ``` ## 2. 模型训练代码 ```python # train.py from ultralytics import YOLO import argparse def train_model(data_yaml, model_name='yolov8n.pt', epochs=100, imgsz=640): """ 训练YOLOv8模型 :param data_yaml: 数据集配置文件路径 :param model_name: 预训练模型名称 :param epochs: 训练轮次 :param imgsz: 输入图像尺寸 """ # 加载模型 model = YOLO(model_name) # 训练配置 results = model.train( data=data_yaml, epochs=epochs, imgsz=imgsz, batch=16, # 根据GPU显存调整 patience=10, # 早停机制 device='cuda' if torch.cuda.is_available() else 'cpu', name='custom_yolov8', save=True, save_period=10, # 每10个epoch保存一次 optimizer='auto', lr0=0.01, # 初始学习率 lrf=0.01, # 最终学习率 momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, box=7.5, # box损失权重 cls=0.5, # 分类损失权重 dfl=1.5, # DFL损失权重 augment=True, # 启用数据增强 flipud=0.5, # 上下翻转概率 fliplr=0.5, # 左右翻转概率 mosaic=1.0, # mosaic增强概率 mixup=0.1, # mixup增强概率 copy_paste=0.1, # copy-paste增强概率 hsv_h=0.015, # 色调增强 hsv_s=0.7, # 饱和度增强 hsv_v=0.4, # 亮度增强 ) # 验证模型 metrics = model.val() print(f"mAP50-95: {metrics.box.map:.4f}") # 导出模型 model.export(format='onnx', imgsz=imgsz, opset=12) print(f"模型已导出为ONNX格式: runs/train/custom_yolov8/weights/best.onnx") return results if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default='data/dataset.yaml', help='数据集配置文件') parser.add_argument('--model', type=str, default='yolov8n.pt', help='预训练模型路径') parser.add_argument('--epochs', type=int, default=100, help='训练轮次') parser.add_argument('--imgsz', type=int, default=640, help='输入图像尺寸') args = parser.parse_args() train_model(args.data, args.model, args.epochs, args.imgsz) ``` ## 3. 推理检测代码 ```python # detect.py import cv2 import numpy as np from ultralytics import YOLO import time import argparse class YOLOv8Detector: def __init__(self, model_path, conf_thresh=0.5, iou_thresh=0.45): """ 初始化YOLOv8检测器 :param model_path: 模型路径 :param conf_thresh: 置信度阈值 :param iou_thresh: IOU阈值 """ self.model = YOLO(model_path) self.conf_thresh = conf_thresh self.iou_thresh = iou_thresh self.class_names = self.model.names self.colors = self.generate_colors(len(self.class_names)) def generate_colors(self, n): """为每个类别生成随机颜色""" return [tuple(np.random.randint(0, 255, 3).tolist()) for _ in range(n)] def detect(self, image): """ 执行目标检测 :param image: 输入图像 (numpy array) :return: 检测结果图像, 检测结果列表 """ start_time = time.time() # 执行推理 results = self.model.predict( image, conf=self.conf_thresh, iou=self.iou_thresh, verbose=False ) # 处理结果 result_image = image.copy() detections = [] for result in results: boxes = result.boxes.xyxy.cpu().numpy() confs = result.boxes.conf.cpu().numpy() class_ids = result.boxes.cls.cpu().numpy().astype(int) for box, conf, class_id in zip(boxes, confs, class_ids): x1, y1, x2, y2 = map(int, box) class_name = self.class_names[class_id] color = self.colors[class_id] # 绘制边界框 cv2.rectangle(result_image, (x1, y1), (x2, y2), color, 2) # 绘制标签背景 label = f"{class_name}: {conf:.2f}" (label_width, label_height), baseline = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1 ) cv2.rectangle( result_image, (x1, y1 - label_height - baseline - 5), (x1 + label_width, y1 - 5), color, -1 ) # 绘制标签文本 cv2.putText( result_image, label, (x1, y1 - baseline - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1 ) detections.append({ "class_id": class_id, "class_name": class_name, "confidence": float(conf), "bbox": [int(x1), int(y1), int(x2), int(y2)] }) inference_time = time.time() - start_time fps = 1.0 / inference_time if inference_time > 0 else 0 # 添加FPS信息 cv2.putText( result_image, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2 ) return result_image, detections def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, default='best.pt', help='模型路径') parser.add_argument('--source', type=str, default='0', help='输入源: 0为摄像头, 文件路径或URL') parser.add_argument('--conf', type=float, default=0.5, help='置信度阈值') parser.add_argument('--iou', type=float, default=0.45, help='IOU阈值') args = parser.parse_args() # 初始化检测器 detector = YOLOv8Detector(args.model, args.conf, args.iou) # 处理输入源 if args.source.isdigit(): cap = cv2.VideoCapture(int(args.source)) else: cap = cv2.VideoCapture(args.source) while cap.isOpened(): ret, frame = cap.read() if not ret: break # 执行检测 result_frame, detections = detector.detect(frame) # 显示结果 cv2.imshow('YOLOv8 Detection', result_frame) # 按'q'退出 if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows() if __name__ == "__main__": main() ``` ## 4. 工具函数 ```python # utils.py import cv2 import numpy as np import os from tqdm import tqdm def process_video(input_path, output_path, detector): """ 处理视频文件并保存结果 :param input_path: 输入视频路径 :param output_path: 输出视频路径 :param detector: YOLOv8检测器实例 """ cap = cv2.VideoCapture(input_path) if not cap.isOpened(): print(f"无法打开视频文件: {input_path}") return # 获取视频属性 width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 创建视频写入器 fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) # 处理每一帧 for _ in tqdm(range(total_frames), desc="处理视频"): ret, frame = cap.read() if not ret: break result_frame, _ = detector.detect(frame) out.write(result_frame) cap.release() out.release() print(f"视频处理完成,保存至: {output_path}") def process_image_folder(input_dir, output_dir, detector): """ 处理图像文件夹中的所有图像 :param input_dir: 输入图像文件夹路径 :param output_dir: 输出图像文件夹路径 :param detector: YOLOv8检测器实例 """ os.makedirs(output_dir, exist_ok=True) image_exts = ['.jpg', '.jpeg', '.png', '.bmp'] # 获取所有图像文件 image_files = [f for f in os.listdir(input_dir) if os.path.splitext(f)[1].lower() in image_exts] # 处理每张图像 for filename in tqdm(image_files, desc="处理图像"): image_path = os.path.join(input_dir, filename) image = cv2.imread(image_path) if image is None: print(f"无法读取图像: {image_path}") continue result_image, _ = detector.detect(image) output_path = os.path.join(output_dir, filename) cv2.imwrite(output_path, result_image) print(f"图像处理完成,保存至: {output_dir}") def export_detections_to_csv(detections, output_path): """ 将检测结果导出为CSV文件 :param detections: 检测结果列表 :param output_path: 输出CSV文件路径 """ import pandas as pd data = [] for det in detections: data.append({ 'frame_id': det.get('frame_id', 0), 'class_id': det['class_id'], 'class_name': det['class_name'], 'confidence': det['confidence'], 'x1': det['bbox'][0], 'y1': det['bbox'][1], 'x2': det['bbox'][2], 'y2': det['bbox'][3], 'width': det['bbox'][2] - det['bbox'][0], 'height': det['bbox'][3] - det['bbox'][1] }) df = pd.DataFrame(data) df.to_csv(output_path, index=False) print(f"检测结果已导出至: {output_path}") ``` ## 5. 使用示例 ### 训练模型 ```bash python train.py --data data/coco.yaml --model yolov8n.pt --epochs 100 --imgsz 640 ``` ### 实时摄像头检测 ```bash python detect.py --model runs/train/custom_yolov8/weights/best.pt --source 0 ``` ### 处理视频文件 ```python from detect import YOLOv8Detector from utils import process_video detector = YOLOv8Detector('best.pt') process_video('input.mp4', 'output.mp4', detector) ``` ### 批量处理图像文件夹 ```python from detect import YOLOv8Detector from utils import process_image_folder detector = YOLOv8Detector('best.pt') process_image_folder('input_images', 'output_images', detector) ``` ## 性能优化技巧 1. **TensorRT加速**: ```python # 导出为TensorRT引擎 model.export(format='engine', imgsz=640) ``` 2. **线程处理**: ```python from threading import Thread class VideoProcessor: def __init__(self, detector): self.detector = detector self.frame_queue = [] self.processed_frame = None self.running = True # 启动处理线程 self.process_thread = Thread(target=self.process_frames) self.process_thread.start() def process_frames(self): while self.running: if self.frame_queue: frame = self.frame_queue.pop(0) self.processed_frame, _ = self.detector.detect(frame) def add_frame(self, frame): if len(self.frame_queue) < 5: # 限制队列长度 self.frame_queue.append(frame) def stop(self): self.running = False self.process_thread.join() ``` 3. **模型量化**: ```python # 导出为INT8量化模型 model.export(format='onnx', imgsz=640, int8=True) ``` ## 完整系统功能 1. **输入源支持**: - 摄像头实时检测 - 视频文件处理 - 图像文件夹批量处理 - RTSP流媒体支持 2. **高级功能**: - 目标跟踪(搭配DeepSORT) - 目标计数 - 越界检测 - 区域入侵检测 3. **结果导出**: - 带标注图像/视频 - CSV格式检测结果 - JSON格式结构化数据 - 实时RTMP流输出 ## 部署方案 1. **本地部署**: - 使用PyInstaller打包为可执行文件 - 创建Windows服务或Linux守护进程 2. **Web部署**: ```python # 使用Flask创建API from flask import Flask, request, jsonify from detect import YOLOv8Detector import cv2 import numpy as np app = Flask(__name__) detector = YOLOv8Detector('best.pt') @app.route('/detect', methods=['POST']) def detect_api(): if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] img_bytes = file.read() nparr = np.frombuffer(img_bytes, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) _, detections = detector.detect(img) return jsonify(detections) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000) ``` 3. **边缘设备部署**: - 使用ONNX Runtime在Jetson Nano上运行 - 使用TensorRT加速推理 - 使用OpenVINO优化Intel设备性能
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

埋头苦干小金哥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值