1.背景
目标检测中多类别经常需要同类别间做NMS,不同类别间不做NMS,官方的yolov5用pt推理里面已经实现了一版:
实现代码在utils/general.py
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
agnostic参数 True表示多个类一起计算nms,False表示按照不同的类分别进行计算nms
代码重点是在 '+c’这里的c就是偏移量
x[:, :4]表示box(从二维看第0,1,2,3列)
x[:, 4] 表示分数(从二维看第4列)
x[:, 5:6]表示类IDX(从二维看第5列)
max_wh这里是4096,这样偏移量仅取决于类IDX,并且足够大。
2.基于onnx推理
基于onnx推理的也是通过agnostic=False实现,原理和上面的基于py推理的一样,也是需要偏移量,核心地方如下:
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
完整代码如下:
"""
检测预处理和后处理相关操作
"""
import time
import cv2
import numpy as np
import logging
import onnxruntime
import copy
import os
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
def cv2_imread(path):
img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), 1)
return img
def cv2_imwrite(image, image_path, type='jpg'):
cv2.imencode('.{}'.format(type), image)[1].tofile(image_path)
def my_letter_box(img, size=(640, 640)): #
h, w, c = img.shape
r = min(size[0] / h, size[1] / w)
new_h, new_w = int(h * r), int(w * r)
top = int((size[0] - new_h) / 2)
left = int((size[1] - new_w) / 2)
bottom = size[0] - new_h - top
right = size[1] - new_w - left
#img_resize = cv2.resize(img, (new_w, new_h),interpolation=cv2.INTER_CUBIC)
img_resize = cv2.resize(img, (new_w, new_h))
img = cv2.copyMakeBorder(img_resize, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT,
value=(128, 128, 128))
return img, r, (left, top)
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding<