踩了三天坑,看了各种资料,跑官网提供的C++ demo,python demo可以得到正确的结果,但是从头从 pt->onnx->rknn文件总是无法成功。因为不好的工作思路,习惯找开源,于是到git 上下载各种版本代码尝试都出推理结果不正确的问题。后来头疼的想要放弃,后来看到博客给提供了思路,分别为:
rknn导出yolov5模型在python中可正常推理,在c++中无结果._rknn 模型推理无法获取结果-优快云博客Yolov5笔记--RKNN推理部署源码的粗略理解_rknn yolov5-优快云博客
废话不多说,进入正题,给大家提供一套完整的从训练到部署的使用流程。
训练代码链接,使用官网提供的即可:
GitHub - ultralytics/yolov5: YOLOv5 🚀 in PyTorch > ONNX > CoreML > TFLite
各位需要根据实际需求进行类别修改,以及训练数据存放位置的调整,这里不再赘述。
第一步:训练好的pt 模型转换成onnx ,需要对代码作出修改,修改的位置分别包含两处:
models/yolo.py,里面Detect 类里面的forward 函数。
第一处将:
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
if isinstance(self, Segment): # (boxes + masks)
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
修改为:
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
return x
第二处将:
export.py 文件
shape = tuple((y[0] if isinstance(y, tuple) else y).shape) # model output shape
改为:
shape = tuple(y[0].shape) # model output shape
完成修改后执行:
python export.py --weights yolov5s.pt --img-size 640 --include onnx
第二步:onnx 文件转成 rknn文件
下载代码网址,rockchip官网提供的代码。
GitHub - rockchip-linux/rknn-toolkit2
需要修改的地方两处:
一个是类别数量,在include/postprocess.h里面:
#define OBJ_CLASS_NUM 8
一个是src/postprocess.cpp里面process函数改为:
static int process(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride,
std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId, float threshold,
int32_t zp, float scale)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
int8_t thres_i8 = qnt_f32_to_affine(threshold, zp, scale);
for (int a = 0; a < 3; a++)
{
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j];
if (box_confidence >= thres_i8)
{
int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;
int8_t *in_ptr = input + offset;
float box_x = sigmoid((deqnt_affine_to_f32(*in_ptr, zp, scale))) * 2.0 - 0.5;
float box_y = sigmoid((deqnt_affine_to_f32(in_ptr[grid_len], zp, scale))) * 2.0 - 0.5;
float box_w = sigmoid((deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale))) * 2.0;
float box_h = sigmoid((deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale))) * 2.0;
box_x = (box_x + j) * (float)stride;
box_y = (box_y + i) * (float)stride;
box_w = box_w * box_w * (float)anchor[a * 2];
box_h = box_h * box_h * (float)anchor[a * 2 + 1];
box_x -= (box_w / 2.0);
box_y -= (box_h / 2.0);
int8_t maxClassProbs = in_ptr[5 * grid_len];
int maxClassId = 0;
for (int k = 1; k < OBJ_CLASS_NUM; ++k)
{
int8_t prob = in_ptr[(5 + k) * grid_len];
if (prob > maxClassProbs)
{
maxClassId = k;
maxClassProbs = prob;
}
}
if (maxClassProbs > thres_i8)
{
objProbs.push_back(sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale)) * sigmoid(deqnt_affine_to_f32(box_confidence, zp, scale)));
classId.push_back(maxClassId);
validCount++;
boxes.push_back(box_x);
boxes.push_back(box_y);
boxes.push_back(box_w);
boxes.push_back(box_h);
}
}
}
}
}
return validCount;
}