首先分享两个可视化的工具。netscope,打开WEB,将文件拖动到左端空白。
http://ethereon.github.io/netscope/#/editor
Netron支持主流各种框架的模型结构可视化工作,我给出gayhub链接:
https://github.com/lutzroeder/Netron
YoLov3在性能上做过优化,用来做人脸和其它识别非常地棒。YoloV3,Caffe框架都支持训练。在部署时要写推理部分。检测部分的代码网上资料较少,这里给出一个框架。目标将YoloV3移植到嵌入式,C++平台。下面直接给出检测部分。希望能帮到大家。
// specific language governing permissions and limitations under the License.
#include <opencv2/opencv.hpp>
#include "detection.h"
#include <fstream>
#include <iostream>
#include <algorithm>
#include <math.h>
#include <string>
#include "cpu.h"
namespace mace {
namespace yolov3{
std::string coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
Option::Option()
{
lightmode = true;
//num_threads = get_cpu_count();
num_threads=4;
blob_allocator = 0;
workspace_allocator = 0;
}
static Option g_default_option;
const Option& get_default_option()
{
return g_default_option;
}
int set_default_option(const Option& opt)
{
if (opt.num_threads <= 0)
{
fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);
return -1;
}
g_default_option = opt;
return 0;
}
Yolov3DetectionOutput::Yolov3DetectionOutput()
{
}
Yolov3DetectionOutput::~Yolov3DetectionOutput()
{
//delete softmax;
}
int label;
static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)
{
if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
{
// no intersection
return 0.f;
}
float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
return inter_width * inter_height;
}
template <typename T>
static void qsort_descent_inplace(std::vector<T> &datas, std::vector<float>& scores, int left, int right)
{
int i = left;
int j = right;
int middle =(left + right) / 2;
float f = scores[middle];
while (i < j)
{
while(scores[i]>f){
i++;
}
while(scores[j]<f){
if(left==middle)break;
j--;
}
if (i <j)
{
// swap
std::swap(datas[i], datas[j]);
std::swap(scores[i], scores[j]);
i++;
j--;
}
if(i==j)
{
i++;
j--;
}
}
if (left < j) qsort_descent_inplace(datas, scores, left, j);
if (i < right) qsort_descent_inplace(datas, scores, i, right);
}
template <typename T>
static void qsort_descent(std::vector<T>& datas, std::vector<float>& scores)
{
if (datas.empty() || scores.empty())
return;
int left=0;
int right=datas.size()-1;
qsort_descent_inplace(datas, scores, left,right);
}
static void nms_sorted_bboxes(const std::vector<BBoxRect>& bboxes, std::vector<BBoxRect>& picked_boxes,std::vector<size_t>& picked, float nms_threshold)
{
picked.clear();
const int n = bboxes.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
const BBoxRect& r = bboxes[i];
float width = r.xmax - r.xmin;
float height = r.ymax - r.ymin;
areas[i] = width * height;
}
picked.push_back(0);
picked_boxes.push_back(bboxes[0]);
for (int i = 1; i < n; i++)
{
const BBoxRect& a = bboxes[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const BBoxRect& b = bboxes[picked[j]];
// intersection over union
float ratio=areas[i]/areas[picked[j]];
ratio=(ratio>1)?ratio:(1.0/(float)ratio);
//ratio=sqrt(ratio);
float inter_area = intersection_area(a, b)*ratio;
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
{
keep = 0;
break;
}
}
if (keep)
{
picked.push_back(i);
picked_boxes.push_back(a);
}
}
}
static inline float sigmoid(float x)
{
return 1.f / (1.f + exp(-x));
}
int Yolov3DetectionOutput::forward_nhwc(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs,std::vector<BBoxRect>&boxes) const
{
size_t num_class = 80;
size_t NUMS_ANCHOR = 3;
float confidence_threshold=0.6;
float nms_threshold = 0.45f;
size_t scale[3]={32,16,8};
size_t m_[9]={6,7,8,4,5,6,1,2,3};
size_t anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326};
size_t num_mask=3* NUMS_ANCHOR;
size_t len_biases=3* NUMS_ANCHOR*2;
Mat anchors_scale(3,scale);
Mat mask(num_mask,m_);
Mat biases(len_biases,anchors);
std::vector<BBoxRect> all_bbox_rects;
std::vector<float> all_bbox_scores;
for (size_t b = 0; b < bottom_blobs.size(); b++)
{
const Mat& bottom_top_blobs = bottom_blobs[b];
size_t w = bottom_top_blobs.w;
size_t h = bottom_top_blobs.h;
size_t channels = bottom_top_blobs.c;
const size_t channels_per_box = channels / NUMS_ANCHOR;
size_t mask_offset = b * NUMS_ANCHOR;
size_t net_w = (size_t)((reinterpret_cast<size_t *>(anchors_scale.data))[b] * w);
size_t net_h = (size_t)((reinterpret_cast<size_t *>(anchors_scale.data))[b] * h);
std::cout<<"w,h,c"<<w<<":"<<h<<":"<<channels<<std::endl;
std::cout<<"blobs w,h,channels"<<bottom_top_blobs.w<<":"<<bottom_top_blobs.h<<":"<<bottom_top_blobs.c<<std::endl;
if((net_w!=net_h) || (net_w!=416)){
std::cerr<<"check Data Input"<<std::endl;
//return -1;
}
float *p;
#pragma omp parallel for num_threads(opt.num_threads)
for (size_t k = 0; k < h*w; k++)
{
size_t i=k/w;
size_t j=k%w;
for (size_t pp = 0; pp < NUMS_ANCHOR; pp++)
{
size_t biases_index = mask[pp+NUMS_ANCHOR*b];
const float bias_w = (reinterpret_cast<size_t *>(biases.data))[biases_index * 2];
const float bias_h = (reinterpret_cast<size_t *>(biases.data))[biases_index * 2+1] ;
long bias=k*channels+pp*channels_per_box;
&