yolov10-ncnn量化部署完整流程

流年瞬间

已于 2024-06-12 11:08:36 修改

阅读量3.4k

点赞数 26

CC 4.0 BY-SA版权

文章标签： YOLO python 开发语言图像处理视觉检测人工智能

于 2024-06-07 14:46:31 首次发布

本文链接：https://blog.youkuaiyun.com/ws771216/article/details/139526507

一、模型训练

1.yolov10官方源码：https://github.com/THU-MIG/yolov10

2.训练自己的数据，修改data.yaml，类别数80，修改第一个类别为自己的类别

3.运行训练脚本

yolo detect train model= yolov10n.pt data= ultralytics/cfg/datasets/cuoceng.yaml epochs=200 batch=128 device=2,3

4.训练完成得到pt模型文件

二、模型转ncnn，量化int8

1.创建export.py

from ultralytics import YOLO

model = YOLO("/data/wangshuai/warpyolo/yolov10-1/yolov10/runs/detect/train2/weights/best.pt")

success = model.export(format="ncnn", dynamic=True, simplify=True, opset=13)

2.运行脚本输出得到ncnn模型文件

3.修改模型网络结构

修改head.py中v10Detect类中的forward函数，修改如下：

class v10Detect(Detect):

max_det = 300

def __init__(self, nc=80, ch=()):

super().__init__(nc, ch)

c3 = max(ch[0], min(self.nc, 100)) # channels

self.cv3 = nn.ModuleList(nn.Sequential(nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)), \

nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)), \

nn.Conv2d(c3, self.nc, 1)) for i, x in enumerate(ch))

self.one2one_cv2 = copy.deepcopy(self.cv2)

self.one2one_cv3 = copy.deepcopy(self.cv3)

""" def forward(self, x):

one2one = self.forward_feat([xi.detach() for xi in x], self.one2one_cv2, self.one2one_cv3)

if not self.export:

one2many = super().forward(x)

if not self.training:

one2one = self.inference(one2one)

if not self.export:

return {"one2many": one2many, "one2one": one2one}

else:

assert(self.max_det != -1)

boxes, scores, labels = ops.v10postprocess(one2one.permute(0, 2, 1), self.max_det, self.nc)

return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)

else:

return {"one2many": one2many, "one2one": one2one} """

def forward(self, x):

z = [] # inference output

for i in range(len(x)):

print('222',self.cv2[i](x[i]).permute(0, 2, 3, 1).shape)

print('333',self.cv3[i](x[i]).permute(0, 2, 3, 1).shape)

x[i] = torch.cat((((self.one2one_cv3[i](x[i])).permute(0, 2, 3, 1)).sigmoid(),(self.one2one_cv2[i](x[i])).permute(0, 2, 3, 1)), 3)

feat = x[i] # conv

# x(bs,255,20,20) -> x(bs,20,20,255)

feat = feat.contiguous()

print('111',feat.shape)

z.append(feat)

return tuple(z)

def bias_init(self):

super().bias_init()

"""Initialize Detect() biases, WARNING: requires stride availability."""

m = self # self.model[-1] # Detect() module

# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1

# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency

for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride): # from

a[-1].bias.data[:] = 1.0 # box

b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)

4.修改完成后重新运行export.py

三、ncnn模型推理

1.在ncnn中编译yolov10.cpp，修改模型文件路径，修改输入输出，修改输出图片保存路径

yolov10.cpp：

#include "layer.h"
#include "net.h"

#include "opencv2/opencv.hpp"

#include <float.h>
#include <stdio.h>
#include <vector>
#include <iostream>

#define MAX_STRIDE 32

struct Object
{
   cv::Rect_<float> rect;
   int label;
   float prob;
};

static float softmax(
   const float* src,
   float* dst,
   int length
)
{
   float alpha = -FLT_MAX;
   for (int c = 0; c < length; c++)
   {
       float score = src[c];
       if (score > alpha)
       {
           alpha = score;
       }
   }

   float denominator = 0;
   float dis_sum = 0;
   for (int i = 0; i < length; ++i)
   {
       dst[i] = expf(src[i] - alpha);
       denominator += dst[i];
   }
   for (int i = 0; i < length; ++i)
   {
       dst[i] /= denominator;
       dis_sum += i * dst[i];
   }
   return dis_sum;
}
static float clamp(
   float val,
   float min = 0.f,
   float max = 1280.f
)
{
   return val > min ? (val < max ? val : max) : min;
}
static void non_max_suppression(
   std::vector<Object>& proposals,
   std::vector<Object>& results,
   int orin_h,
   int orin_w,
   float dh = 0,
   float dw = 0,
   float ratio_h = 1.0f,
   float ratio_w = 1.0f,
   float conf_thres = 0.25f,
   float iou_thres = 0.65f
)
{
   results.clear();

   for (auto& pro : proposals)
   {
float x0 = pro.rect.x;
       float y0 = pro.rect.y;
       float x1 = pro.rect.x + pro.rect.width;
       float y1 = pro.rect.y + pro.rect.height;
       float& score = pro.prob;
       int& label = pro.label;

       x0 = (x0 - dw) / ratio_w;
       y0 = (y0 - dh) / ratio_h;
       x1 = (x1 - dw) / ratio_w;
       y1 = (y1 - dh) / ratio_h;

       x0 = clamp(x0, 0.f, orin_w);
       y0 = clamp(y0, 0.f, orin_h);
       x1 = clamp(x1, 0.f, orin_w);
       y1 = clamp(y1, 0.f, orin_h);

       Object obj;
       obj.rect.x = x0;
       obj.rect.y = y0;
       obj.rect.width = x1 - x0;
       obj.rect.height = y1 - y0;
       obj.prob = score;
       obj.label = label;
       results.push_back(obj);
   }
}

static void generate_proposals(
   int stride,
   const ncnn::Mat& feat_blob,
   const float prob_threshold,
   std::vector<Object>& objects
)
{
   const int reg_max = 16;
   float dst[16];
   const int num_w = feat_blob.w;
   const int num_grid_y = feat_blob.c;
   const int num_grid_x = feat_blob.h;

const int num_class = num_w - 4 * reg_max;

   for (int i = 0; i < num_grid_y; i++)
   {
       for (int j = 0; j < num_grid_x; j++)
       {

const float* matat = feat_blob.channel(i).row(j);

           int class_index = 0;
           float class_score = -FLT_MAX;
           for (int c = 0; c < num_class; c++)
           {
               float score = matat[c];
               if (score > class_score)
               {
                   class_index = c;
                   class_score = score;
               }
           }
           if (class_score >= prob_threshold)
           {

               float x0 = j + 0.5f - softmax(matat + num_class, dst, 16);
               float y0 = i + 0.5f - softmax(matat + num_class + 16, dst, 16);
               float x1 = j + 0.5f + softmax(matat + num_class + 2 * 16, dst, 16);
               float y1 = i + 0.5f + softmax(matat + num_class + 3 * 16, dst, 16);

               x0 *= stride;
               y0 *= stride;
               x1 *= stride;
               y1 *= stride;

               Object obj;
               obj.rect.x = x0;
               obj.rect.y = y0;
               obj.rect.width = x1 - x0;
               obj.rect.height = y1 - y0;
               obj.label = class_index;
               obj.prob = class_score;
               objects.push_back(obj);

           }
       }
   }
}

static int detect_yolov10(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov10;

yolov10.opt.use_vulkan_compute = true;
// yolov10.opt.use_bf16_storage = true;

   // original pretrained model from https://github.com/ultralytics/ultralytics
   // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
   if (yolov10.load_param("/home/wangshuai/WS/new_kl/kl_ncnn_yolov5/model/model.ncnn-cuo1.param"))
       exit(-1);
   if (yolov10.load_model("/home/wangshuai/WS/new_kl/kl_ncnn_yolov5/model/model.ncnn-cuo1.bin"))
       exit(-1);

   const int target_size = 640;
   const float prob_threshold = 0.25f;
   const float nms_threshold = 0.45f;

int img_w = bgr.cols;
int img_h = bgr.rows;

   // letterbox pad to multiple of MAX_STRIDE
   int w = img_w;
   int h = img_h;
   float scale = 1.f;
   if (w > h)
   {
       scale = (float)target_size / w;
       w = target_size;
       h = h * scale;
   }
   else
   {
       scale = (float)target_size / h;
       h = target_size;
       w = w * scale;
   }

ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);

   // pad to target_size rectangle
   // ultralytics/yolo/data/dataloaders/v5augmentations.py letterbox
   // int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
   // int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;

int wpad = target_size - w;
int hpad = target_size - h;

   int top = hpad / 2;
   int bottom = hpad - hpad / 2;
   int left = wpad / 2;
   int right = wpad - wpad / 2;

   ncnn::Mat in_pad;
   ncnn::copy_make_border(in,
       in_pad,
       top,
       bottom,
       left,
       right,
       ncnn::BORDER_CONSTANT,
       114.f);

const float norm_vals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
in_pad.substract_mean_normalize(0, norm_vals);

ncnn::Extractor ex = yolov10.create_extractor();

ex.input("in0", in_pad);

std::vector<Object> proposals;

   // stride 8
   {
       ncnn::Mat out;
       ex.extract("out0", out);

std::vector<Object> objects8;
generate_proposals(8, out, prob_threshold, objects8);

proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}

   // stride 16
   {
       ncnn::Mat out;

ex.extract("out1", out);

std::vector<Object> objects16;
generate_proposals(16, out, prob_threshold, objects16);

proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}

   // stride 32
   {
       ncnn::Mat out;

ex.extract("out2", out);

std::vector<Object> objects32;
generate_proposals(32, out, prob_threshold, objects32);

       proposals.insert(proposals.end(), objects32.begin(), objects32.end());
   }
// objects = proposals;
for (auto& pro : proposals)
   {
float x0 = pro.rect.x;
       float y0 = pro.rect.y;
       float x1 = pro.rect.x + pro.rect.width;
       float y1 = pro.rect.y + pro.rect.height;
       float& score = pro.prob;
       int& label = pro.label;

       x0 = (x0 - (wpad / 2)) / scale;
       y0 = (y0 - (hpad / 2)) / scale;
       x1 = (x1 - (wpad / 2)) / scale;
       y1 = (y1 - (hpad / 2)) / scale;

       x0 = clamp(x0, 0.f, img_w);
       y0 = clamp(y0, 0.f, img_h);
       x1 = clamp(x1, 0.f, img_w);
       y1 = clamp(y1, 0.f, img_h);

       Object obj;
       obj.rect.x = x0;
       obj.rect.y = y0;
       obj.rect.width = x1 - x0;
       obj.rect.height = y1 - y0;
       obj.prob = score;
       obj.label = label;
       objects.push_back(obj);
   }
   // non_max_suppression(proposals, objects,
   //    img_h, img_w, hpad / 2, wpad / 2,
   //    scale, scale, prob_threshold, nms_threshold);
   return 0;
}

static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
   static const char* class_names[] = {
       "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
       "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
       "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
       "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
       "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
       "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
       "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
       "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
       "hair drier", "toothbrush"
   };

cv::Mat image = bgr.clone();

   for (size_t i = 0; i < objects.size(); i++)
   {
       const Object& obj = objects[i];

fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));

char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);

int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

       int x = obj.rect.x;
       int y = obj.rect.y - label_size.height - baseLine;
       if (y < 0)
           y = 0;
       if (x + label_size.width > image.cols)
           x = image.cols - label_size.width;

cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);

       cv::putText(image, text, cv::Point(x, y + label_size.height),
           cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
   }
cv::imwrite("/home/wangshuai/WS/new_kl/kl_ncnn_yolov5/img_dir/output.jpg", image);
   cv::imshow("image", image);
   cv::waitKey(0);
}

int main(int argc, char** argv)
{
   if (argc != 2)
   {
       fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
       return -1;
   }

const char* imagepath = argv[1];

   cv::Mat m = cv::imread(imagepath, 1);
// cv::resize(m, m, cv::Size(640,640));
   if (m.empty())
   {
       fprintf(stderr, "cv::imread %s failed\n", imagepath);
       return -1;
   }

std::vector<Object> objects;
detect_yolov10(m, objects);

draw_objects(m, objects);

return 0;
}

2.编译环境

ncnn编译环境与yolov8-ncnn一致

3.推理验证

./examples/yolov10 ../img_dir/test/cuoceng.jpg

框好像有点重复了，不知道什么原因，欢迎指正