yolov5
模型使用
- 工程
git clone https://github.com/ultralytics/yolov5.git
- 预训练模型
首次运行时,attempt_download函数会下载对应的模型。模型参数配置在models/*.yaml里。 - yolov5网络组成
Focus + Backbone + Detect - coco数据集训练
下载coco数据集,放在于yolov5同一目录下。
python3 train.py --data coco.yaml --cfg yolov5s.yaml --weights '' --batch-size 64
yolov5m 40
yolov5l 24
yolov5x 16
模型转换部署
pytorch模型部署,一般会转化为onnx。下面来讲讲关于yolov5的转换过程。
- Focus部分修改
./models/common.py中
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
# 使用Contract中的方法
# x = self.contract(x)
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
s = 2
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
x = x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
return self.conv(x)
# 转换slice会有问题,注释掉
# return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
- Detect部分修改
./models/yolo.py中
class Detect(nn.Module):
stride = None # strides computed during build
export = True # onnx export 此处改为True
.......
.......
.......
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
self.training |= self.export
for i in range(self.nl):
# 我们需要输出的维度x(bs,255,grid_x,grid_y)
x[i] = self.m[i](x[i]) # conv
# 注释掉,放在inference部分,转换时,不参与转换
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() #放在此处
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
c++部署,后处理部分代码
void sigmoid(Mat *out, int length)
{
float *pdata = (float *)(out->data);
int i = 0;
for (i = 0; i < length; i++)
{
pdata[i] = 1.0 / (1 + expf(-pdata[i]));
}
}
///generate proposals
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0;
for (n = 0; n < (int)outs.size(); n++) ///
{
// featrue map size
int num_grid_x = (int)(this->inpWidth / this->stride_[n]);
int num_grid_y = (int)(this->inpHeight / this->stride_[n]);
// featrue map area
int area = num_grid_x * num_grid_y;
// logitic regression all predict for anchors.
// one point with three anchors, 80 classes confidence,
// loate with four parmas and obj-confidence
this->sigmoid(&outs[n], 3 * nout * area);
for (q = 0; q < 3; q++) ///anchor
{
const float anchor_w = this->anchors_[n][q * 2];
const float anchor_h = this->anchors_[n][q * 2 + 1];
float *pdata = (float *)outs[n].data + q * nout * area;
for (i = 0; i < num_grid_y; i++)
{
for (j = 0; j < num_grid_x; j++)
{
float box_score = pdata[4 * area + i * num_grid_x + j];
if (box_score > this->objThreshold)
{
float max_class_socre = 0, class_socre = 0;
int max_class_id = 0;
for (c = 0; c < this->classes.size(); c++) //// get max socre
{
class_socre = pdata[(c + 5) * area + i * num_grid_x + j]*box_score;
if (class_socre > max_class_socre)
{
max_class_socre = class_socre;
max_class_id = c;
}
}
if (max_class_socre > this->confThreshold)
{
float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride_[n]; ///cx
float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride_[n]; ///cy
float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w; ///w
float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h; ///h
int left = (cx - 0.5 * w) * ratiow;
int top = (cy - 0.5 * h) * ratioh; ///
classIds.push_back(max_class_id);
confidences.push_back(max_class_socre);
boxes.push_back(Rect(left, top, (int)(w * ratiow), (int)(h * ratioh)));
}
}
}
}
}
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
vector<int> indices;
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
if (box.area() < min_area || float(box.width) / box.height > 5 || float(box.width) / box.height < 0.2)
{
continue;
}
box.x = box.x > 0 ? box.x : 0;
box.y = box.y > 0 ? box.y : 0,
box.width = box.x + box.width < frame.cols ? box.width : frame.cols - 1 - box.x,
box.height = box.y + box.height < frame.rows ? box.height : frame.rows - 1 - box.y;
}