【yolov5s目标检测】opencv加载onnx模型在GPU上进行推理

最新推荐文章于 2025-09-25 17:26:37 发布

原创最新推荐文章于 2025-09-25 17:26:37 发布 · 4.2k 阅读

35 ·

CC 4.0 BY-SA版权

文章标签：

#opencv #人脸识别 #图像识别 #计算机视觉 #ssd

本文探讨了使用Yolov5模型在GPU和CPU上的推理性能差异，发现GPU模式下模型能百分之百检测目标，而CPU模式下识别率显著降低。

推理截图

推理测试

源码：

main主程序：

#include "yolo.h"
#include <iostream>
#include<opencv2//opencv.hpp>
#include<math.h>


using namespace std;
using namespace cv;
using namespace dnn;


int main()
{
  string model_path = "yolov5sGPU.onnx";//CPU可用    release GPU  
  Yolo test;
  Net net;
  if (test.readModel(net, model_path, true))//false: CPU    true:GPU
  {
    cout << "read net ok!" << endl;
  }
  else {
    return -1;
  }
  bool vedio = true;
  if (vedio)
  {
    // 用 OpenCV 打开摄像头读取文件（你随便咋样获取图片都OK哪）
    cv::VideoCapture cap = cv::VideoCapture("test2.mp4");
    // 设置宽高 无所谓多宽多高后面都会通过一个算法转换为固定宽高的
    // 固定宽高值应该是你通过YoloV5训练得到的模型所需要的
    // 传入方式是构造 YoloV5 对象时传入 width 默认值为 640，height 默认值为 640
    //cap.set(cv::CAP_PROP_FRAME_WIDTH, 1000);
    //cap.set(cv::CAP_PROP_FRAME_HEIGHT, 800);


    cv::Mat frame;
    //生成随机颜色//
    vector<Scalar> color;
    srand(time(0));
    for (int i = 0; i < 80; i++) {//80种类
      int b = rand() % 256;
      int g = rand() % 256;
      int r = rand() % 256;
      color.push_back(Scalar(b, g, r));
    }
    //color.push_back(Scalar(0, 0, 255));
    while (cap.isOpened())
    {
      // 读取一帧
      cap.read(frame);


      if (frame.empty())
      {
        std::cout << "Read frame failed! or The End!" << std::endl;
        break;
      }
      vector<Output> result;
      if (test.Detect(frame, net, result))//返回true 检测到对象//
      {
        test.drawPred(frame, result, color);
      }
      else {
        cout << "Detect Failed!" << endl;// CPU模式 有很多识别不到。GPU模式都可识别
      }
      //resize(frame, frame, Size(960, 540));
      cv::imshow("result", frame);
      if (cv::waitKey(1) == 27) break;
    }
    cv::destroyWindow("result");
    return 0;
  }
  else//图片目录
  {
    //生成随机颜色//
    vector<Scalar> color;
    srand(time(0));
    for (int i = 0; i < 80; i++) {//80种类
      int b = rand() % 256;
      int g = rand() % 256;
      int r = rand() % 256;
      color.push_back(Scalar(b, g, r));
    }
    //color.push_back(Scalar(0, 0, 255));
    String folder_path = "./image";
    //String folder_path = "./test2";
    //String folder_path = "./test";
    std::vector<cv::String> file_names;
    cv::glob(folder_path, file_names);   //get file names




    for (int i = 0; i < file_names.size(); i++) {
      vector<Output> result;
      cv::Mat img;
      std::cout << file_names[i] << std::endl;
      img = cv::imread(file_names[i]);
      if (!img.data) {
        continue;
      }
      //resize(img, img, Size(956, 800));
      if (test.Detect(img, net, result))//返回true 检测到对象//
      {
        test.drawPred(img, result, color);
      }
      else {
        cout << "Detect Failed!" << endl;// CPU模式 有很多识别不到。GPU模式都可识别
      }
      //resize(img, img, Size(717, 600));//Size(956, 800)
      //resize(img, img, Size(1434, 1200));
      namedWindow("result", cv::WINDOW_AUTOSIZE);
      imshow("result", img);
      cv::waitKey(1000);//等待1秒//
    }
    //cv::destroyAllWindows();
    cv::destroyWindow("result");
    //system("pause");
  }


  return 0;
}

yolo.h

#pragma once
#include<iostream>
#include<opencv2/opencv.hpp>


#define YOLO_P6 false //是否使用P6模型//


struct Output {
  int id;             //结果类别id/
  float confidence;   //结果置信度//
  cv::Rect box;       //矩形框//
};


class Yolo {
public:
  Yolo() {
  }
  ~Yolo() {}
  bool readModel(cv::dnn::Net& net, std::string& netPath, bool isCuda);
  bool Detect(cv::Mat& SrcImg, cv::dnn::Net& net, std::vector<Output>& output);
  void drawPred(cv::Mat& img, std::vector<Output> result, std::vector<cv::Scalar> color);


private:
#if(defined YOLO_P6 && YOLO_P6==true)
  const float netAnchors[4][6] = { { 19,27, 44,40, 38,94 },{ 96,68, 86,152, 180,137 },{ 140,301, 303,264, 238,542 },{ 436,615, 739,380, 925,792 } };


  const int netWidth = 1280;  //ONNX图片输入宽度
  const int netHeight = 1280; //ONNX图片输入高度


  const int strideSize = 4;  //stride size
#else
  const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };


  const int netWidth = 640;   //ONNX图片输入宽度 yolov5s.onnx 640
  const int netHeight = 640;  //ONNX图片输入高度


  const int strideSize = 3;   //stride size
#endif // YOLO_P6


  const float netStride[4] = { 8, 16.0,32,64 };


  float boxThreshold = 0.25;
  float classThreshold = 0.25;


  float nmsThreshold = 0.45;
  float nmsScoreThreshold = boxThreshold * classThreshold;


  std::vector<std::string> className = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    "hair drier", "toothbrush" };
};

yolo.cpp

#include"yolo.h"
using namespace std;
using namespace cv;
using namespace cv::dnn;


bool Yolo::readModel(Net& net, string& netPath, bool isCuda = false) {
  try {
    net = readNet(netPath);
  }
  catch (const std::exception&) {
    return false;
  }
  //cuda
  if (isCuda) {
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);//_FP16
  }
  //cpu
  else {
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
  }
  return true;
}
bool Yolo::Detect(Mat& SrcImg, Net& net, vector<Output>& output) {
  Mat blob;
  int col = SrcImg.cols;
  int row = SrcImg.rows;
  int maxLen = MAX(col, row);
  Mat netInputImg = SrcImg.clone();
  if (maxLen > 1.2 * col || maxLen > 1.2 * row) {
    Mat resizeImg = Mat::zeros(maxLen, maxLen, CV_8UC3);
    SrcImg.copyTo(resizeImg(Rect(0, 0, col, row)));
    netInputImg = resizeImg;
  }
  blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(0, 0, 0), true, false);
  //如果在其他设置没有问题的情况下但是结果偏差很大，可以尝试下用下面两句语句//
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(104, 117, 123), true, false);
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(114, 114,114), true, false);
  net.setInput(blob);
  std::vector<cv::Mat> netOutputImg;
  //vector<string> outputLayerName{"345","403", "461","output" };
  //net.forward(netOutputImg, outputLayerName[3]); //获取output的输出//
  try
  {  //release OK
    net.forward(netOutputImg, net.getUnconnectedOutLayersNames());//debug报错 initCUDABackend CUDA backend will fallback to the CPU implementation for the layer "_input"
  }
  catch (const std::exception& e)
  {
    cout << e.what();
  }


  std::vector<int> classIds;//结果id数组//
  std::vector<float> confidences;//结果每个id对应置信度数组//
  std::vector<cv::Rect> boxes;//每个id矩形框//
  float ratio_h = (float)netInputImg.rows / netHeight;
  float ratio_w = (float)netInputImg.cols / netWidth;
  int net_width = className.size() + 5;  //输出的网络宽度是类别数+5//
  float* pdata = (float*)netOutputImg[0].data;
  for (int stride = 0; stride < strideSize; stride++) {    //stride
    int grid_x = (int)(netWidth / netStride[stride]);
    int grid_y = (int)(netHeight / netStride[stride]);
    for (int anchor = 0; anchor < 3; anchor++) {  //anchors
      const float anchor_w = netAnchors[stride][anchor * 2];
      const float anchor_h = netAnchors[stride][anchor * 2 + 1];
      for (int i = 0; i < grid_y; i++) {
        for (int j = 0; j < grid_x; j++) {
          float box_score = pdata[4]; ;//获取每一行的box框中含有某个物体的概率//
          if (box_score >= boxThreshold) {
            cv::Mat scores(1, className.size(), CV_32FC1, pdata + 5);
            Point classIdPoint;
            double max_class_socre;
            minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
            max_class_socre = (float)max_class_socre;
            if (max_class_socre >= classThreshold) {
              //rect [x,y,w,h]
              float x = pdata[0];  //x
              float y = pdata[1];  //y
              float w = pdata[2];  //w
              float h = pdata[3];  //h
              int left = (x - 0.5 * w) * ratio_w;
              int top = (y - 0.5 * h) * ratio_h;
              classIds.push_back(classIdPoint.x);
              confidences.push_back(max_class_socre * box_score);
              boxes.push_back(Rect(left, top, int(w * ratio_w), int(h * ratio_h)));
            }
          }
          pdata += net_width;//下一行//
        }
      }
    }
  }


  //执行非最大抑制以消除具有较低置信度的冗余重叠框（NMS）//
  vector<int> nms_result;
  NMSBoxes(boxes, confidences, nmsScoreThreshold, nmsThreshold, nms_result);
  for (int i = 0; i < nms_result.size(); i++) {
    int idx = nms_result[i];
    Output result;
    result.id = classIds[idx];
    result.confidence = confidences[idx];
    result.box = boxes[idx];
    output.push_back(result);
  }
  if (output.size())
    return true;
  else
    return false;
}


void Yolo::drawPred(Mat& img, vector<Output> result, vector<Scalar> color) {
  for (int i = 0; i < result.size(); i++) {
    int left, top;
    left = result[i].box.x;
    top = result[i].box.y;
    int color_num = i;
    //rectangle(img, result[i].box, color[result[i].id], 2, 8);
    rectangle(img, result[i].box, color[result[i].id], 2, 8);
    string label = className[result[i].id] + ":" + to_string(result[i].confidence);


    int baseLine;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = max(top, labelSize.height);
    //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
    putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, color[result[i].id], 2);
  }
  //imshow("1", img);
  ////imwrite("out.bmp", img);
  //waitKey();
  //destroyAllWindows();
}

结论：

自己训练了一个单分类模型，同一个模型在GPU上推理能够实现百分之百检测到目标，在CPU上推理失败率很高。

参考：

https://blog.youkuaiyun.com/qq_45945548/article/details/121701492

https://github.com/doleron/yolov5-opencv-cpp-python

The End