【应用】YoloV3检测推理

最新推荐文章于 2025-06-25 17:41:21 发布

JeffFFFst

最新推荐文章于 2025-06-25 17:41:21 发布

阅读量1.2k

点赞数

CC 4.0 BY-SA版权

分类专栏： YOLOV3

本文链接：https://blog.youkuaiyun.com/windfly_al/article/details/85043288

本文介绍了如何使用YoloV3进行目标检测，并提供了两个模型结构可视化工具netscope和Netron。文章详细展示了YoloV3在C++平台上的推理代码，包括检测算法和非极大值抑制（NMS）过程。此外，还提供了数据预处理和读取模型权重的方法。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

首先分享两个可视化的工具。netscope，打开WEB，将文件拖动到左端空白。

http://ethereon.github.io/netscope/#/editor

Netron支持主流各种框架的模型结构可视化工作，我给出gayhub链接：
https://github.com/lutzroeder/Netron

YoLov3在性能上做过优化，用来做人脸和其它识别非常地棒。YoloV3，Caffe框架都支持训练。在部署时要写推理部分。检测部分的代码网上资料较少，这里给出一个框架。目标将YoloV3移植到嵌入式，C++平台。下面直接给出检测部分。希望能帮到大家。

// specific language governing permissions and limitations under the License.

#include <opencv2/opencv.hpp>

#include "detection.h"

#include <fstream>

#include <iostream>

#include <algorithm>

#include <math.h>

#include <string>

#include "cpu.h"

namespace mace {

namespace yolov3{

std::string coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};

int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};

Option::Option()

{

lightmode = true;

//num_threads = get_cpu_count();

num_threads=4;

blob_allocator = 0;

workspace_allocator = 0;

}

static Option g_default_option;

const Option& get_default_option()

{

return g_default_option;

}

int set_default_option(const Option& opt)

{

if (opt.num_threads <= 0)

{

fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);

return -1;

}

g_default_option = opt;

return 0;

}

Yolov3DetectionOutput::Yolov3DetectionOutput()

{

}

Yolov3DetectionOutput::~Yolov3DetectionOutput()

{

//delete softmax;

}

int label;

static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)

{

if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)

{

// no intersection

return 0.f;

}

float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);

float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);

return inter_width * inter_height;

}

template <typename T>

static void qsort_descent_inplace(std::vector<T> &datas, std::vector<float>& scores, int left, int right)

{

int i = left;

int j = right;

int middle =(left + right) / 2;

float f = scores[middle];

while (i < j)

{

while(scores[i]>f){

i++;

}

while(scores[j]<f){

if(left==middle)break;

j--;

}

if (i <j)

{

// swap

std::swap(datas[i], datas[j]);

std::swap(scores[i], scores[j]);

i++;

j--;

}

if(i==j)

{

i++;

j--;

}

if (left < j) qsort_descent_inplace(datas, scores, left, j);

if (i < right) qsort_descent_inplace(datas, scores, i, right);

}

template <typename T>

static void qsort_descent(std::vector<T>& datas, std::vector<float>& scores)

{

if (datas.empty() || scores.empty())

return;

int left=0;

int right=datas.size()-1;

qsort_descent_inplace(datas, scores, left,right);

}

static void nms_sorted_bboxes(const std::vector<BBoxRect>& bboxes, std::vector<BBoxRect>& picked_boxes,std::vector<size_t>& picked, float nms_threshold)

{

picked.clear();

const int n = bboxes.size();

std::vector<float> areas(n);

for (int i = 0; i < n; i++)

{

const BBoxRect& r = bboxes[i];

float width = r.xmax - r.xmin;

float height = r.ymax - r.ymin;

areas[i] = width * height;

}

picked.push_back(0);

picked_boxes.push_back(bboxes[0]);

for (int i = 1; i < n; i++)

{

const BBoxRect& a = bboxes[i];

int keep = 1;

for (int j = 0; j < (int)picked.size(); j++)

{

const BBoxRect& b = bboxes[picked[j]];

// intersection over union

float ratio=areas[i]/areas[picked[j]];

ratio=(ratio>1)?ratio:(1.0/(float)ratio);

//ratio=sqrt(ratio);

float inter_area = intersection_area(a, b)*ratio;

float union_area = areas[i] + areas[picked[j]] - inter_area;

// float IoU = inter_area / union_area

if (inter_area / union_area > nms_threshold)

{

keep = 0;

break;

}

if (keep)

{

picked.push_back(i);

picked_boxes.push_back(a);

}

static inline float sigmoid(float x)

{

return 1.f / (1.f + exp(-x));

}

int Yolov3DetectionOutput::forward_nhwc(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs,std::vector<BBoxRect>&boxes) const

{

size_t num_class = 80;

size_t NUMS_ANCHOR = 3;

float confidence_threshold=0.6;

float nms_threshold = 0.45f;

size_t scale[3]={32,16,8};

size_t m_[9]={6,7,8,4,5,6,1,2,3};

size_t anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326};

size_t num_mask=3* NUMS_ANCHOR;

size_t len_biases=3* NUMS_ANCHOR*2;

Mat anchors_scale(3,scale);

Mat mask(num_mask,m_);

Mat biases(len_biases,anchors);

std::vector<BBoxRect> all_bbox_rects;

std::vector<float> all_bbox_scores;

for (size_t b = 0; b < bottom_blobs.size(); b++)

{

const Mat& bottom_top_blobs = bottom_blobs[b];

size_t w = bottom_top_blobs.w;

size_t h = bottom_top_blobs.h;

size_t channels = bottom_top_blobs.c;

const size_t channels_per_box = channels / NUMS_ANCHOR;

size_t mask_offset = b * NUMS_ANCHOR;

size_t net_w = (size_t)((reinterpret_cast<size_t *>(anchors_scale.data))[b] * w);

size_t net_h = (size_t)((reinterpret_cast<size_t *>(anchors_scale.data))[b] * h);

std::cout<<"w,h,c"<<w<<":"<<h<<":"<<channels<<std::endl;

std::cout<<"blobs w,h,channels"<<bottom_top_blobs.w<<":"<<bottom_top_blobs.h<<":"<<bottom_top_blobs.c<<std::endl;

if((net_w!=net_h) || (net_w!=416)){

std::cerr<<"check Data Input"<<std::endl;

//return -1;

}

float *p;

#pragma omp parallel for num_threads(opt.num_threads)

for (size_t k = 0; k < h*w; k++)

{

size_t i=k/w;

size_t j=k%w;

for (size_t pp = 0; pp < NUMS_ANCHOR; pp++)

{

size_t biases_index = mask[pp+NUMS_ANCHOR*b];

const float bias_w = (reinterpret_cast<size_t *>(biases.data))[biases_index * 2];

const float bias_h = (reinterpret_cast<size_t *>(biases.data))[biases_index * 2+1] ;

long bias=k*channels+pp*channels_per_box;

最低0.47元/天解锁文章

200万优质内容无限畅学