yolo5工程化

最新推荐文章于 2025-05-28 20:02:01 发布

Dovy

最新推荐文章于 2025-05-28 20:02:01 发布

阅读量642

点赞数 2

CC 4.0 BY-SA版权

分类专栏： AI

本文链接：https://blog.youkuaiyun.com/Simplify_boy/article/details/114578942

AI 专栏收录该内容

6 篇文章

订阅专栏

本文详细介绍YOLOv5模型的使用与部署流程，包括模型结构、预训练模型下载、模型训练、模型转换为ONNX格式的方法及C++部署过程中的后处理技巧，并对比了YOLO系列模型。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

yolov5

模型使用

工程

git clone https://github.com/ultralytics/yolov5.git

预训练模型
首次运行时，attempt_download函数会下载对应的模型。模型参数配置在models/*.yaml里。
yolov5网络组成
Focus + Backbone + Detect
coco数据集训练
下载coco数据集，放在于yolov5同一目录下。

python3 train.py --data coco.yaml --cfg yolov5s.yaml --weights '' --batch-size 64
                                         yolov5m                               40
                                         yolov5l                               24
                                         yolov5x                               16

模型转换部署

pytorch模型部署，一般会转化为onnx。下面来讲讲关于yolov5的转换过程。

Focus部分修改
./models/common.py中

class Focus(nn.Module):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super(Focus, self).__init__()
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
        self.contract = Contract(gain=2)
    
    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
    	# 使用Contract中的方法
        # x = self.contract(x)
        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
        s = 2
        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
        x = x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
        return self.conv(x)
        # 转换slice会有问题，注释掉
		# return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))

Detect部分修改
./models/yolo.py中

class Detect(nn.Module):
    stride = None  # strides computed during build
    export = True  # onnx export 此处改为True
    .......
    .......
    .......
    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
        	# 我们需要输出的维度x(bs,255,grid_x,grid_y) 
            x[i] = self.m[i](x[i])  # conv    
            # 注释掉，放在inference部分，转换时，不参与转换
            # bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() #放在此处
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)

c++部署，后处理部分代码

void sigmoid(Mat *out, int length)
{
    float *pdata = (float *)(out->data);
    int i = 0;
    for (i = 0; i < length; i++)
    {
        pdata[i] = 1.0 / (1 + expf(-pdata[i]));
    }
}

    ///generate proposals
    vector<int> classIds;
    vector<float> confidences;
    vector<Rect> boxes;
    float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
    int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0;
    for (n = 0; n < (int)outs.size(); n++) ///
    {
        // featrue map size
        int num_grid_x = (int)(this->inpWidth / this->stride_[n]);
        int num_grid_y = (int)(this->inpHeight / this->stride_[n]);

        // featrue map area
        int area = num_grid_x * num_grid_y;

        // logitic regression all predict for anchors.
        // one point with three anchors, 80 classes confidence,
        // loate with four parmas and obj-confidence
        this->sigmoid(&outs[n], 3 * nout * area);
        for (q = 0; q < 3; q++) ///anchor
        {
            const float anchor_w = this->anchors_[n][q * 2];
            const float anchor_h = this->anchors_[n][q * 2 + 1];
            float *pdata = (float *)outs[n].data + q * nout * area;
            for (i = 0; i < num_grid_y; i++)
            {
                for (j = 0; j < num_grid_x; j++)
                {
                    float box_score = pdata[4 * area + i * num_grid_x + j];
                    if (box_score > this->objThreshold)
                    {
                        float max_class_socre = 0, class_socre = 0;
                        int max_class_id = 0;
                        for (c = 0; c < this->classes.size(); c++) //// get max socre
                        {
                            class_socre = pdata[(c + 5) * area + i * num_grid_x + j]*box_score;
                            if (class_socre > max_class_socre)
                            {
                                max_class_socre = class_socre;
                                max_class_id = c;
                            }
                        }

                        if (max_class_socre > this->confThreshold)
                        {
                            float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride_[n];        ///cx
                            float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride_[n]; ///cy
                            float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w;        ///w
                            float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h;        ///h

                            int left = (cx - 0.5 * w) * ratiow;
                            int top = (cy - 0.5 * h) * ratioh; ///

                            classIds.push_back(max_class_id);
                            confidences.push_back(max_class_socre);
                            boxes.push_back(Rect(left, top, (int)(w * ratiow), (int)(h * ratioh)));
                        }
                    }
                }
            }
        }
    }

    // Perform non maximum suppression to eliminate redundant overlapping boxes with
    // lower confidences
    vector<int> indices;
    NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        Rect box = boxes[idx];
        if (box.area() < min_area || float(box.width) / box.height > 5 || float(box.width) / box.height < 0.2)
        {
            continue;
        }
     	box.x = box.x > 0 ? box.x : 0;
        box.y = box.y > 0 ? box.y : 0,
        box.width = box.x + box.width < frame.cols ? box.width : frame.cols - 1 - box.x,
        box.height = box.y + box.height < frame.rows ? box.height : frame.rows - 1 - box.y;
    }