目标检测：IoU（交并比）和 NMS（非极大值抑制）、Soft_NMS方法Python实现

原创已于 2022-08-28 13:04:01 修改 · 642 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#算法 #python #计算机视觉

于 2022-03-05 15:39:03 首次发布

Python 同时被 3 个专栏收录

82 篇文章

订阅专栏

计算机视觉

33 篇文章

订阅专栏

目标检测

24 篇文章

订阅专栏

本文详细介绍了目标检测任务中的关键概念——交并比（IOU）和非极大值抑制（NMS）。通过Python代码展示了如何计算IOU、多维IOU以及GIoU，并解释了NMS的工作原理和实现步骤。此外，还提及了Soft_NMS作为NMS的一种优化策略。

部署运行你感兴趣的模型镜像

交并比（Intersection over Union）和非极大值抑制是（Non-Maximum Suppression）是目标检测任务中非常重要的两个概念。例如在用训练好的模型进行测试时，网络会预测出一系列的候选框。这时候我们会用NMS来移除一些多余的候选框。即移除一些IOU值大于某个阈值的框。然后在剩下的候选框中，分别计算与ground truth的IOU值，通常会规定当候选框和ground truth的IOU值大于0.5时，认为检测正确。下面我们分别用python实现IOU和NMS。

交并比IOU

如上图所示，IOU值定位为两个矩形框面积的交集和并集的比值。即：
$=\frac{A \cap B}{A \cup B}$

python代码实现

import numpy as np
def compute_iou(box1, box2, wh=False):
	"""
	compute the iou of two boxes.
	Args:
		box1, box2: [xmin, ymin, xmax, ymax] (wh=False) or [xcenter, ycenter, w, h] (wh=True)
		wh: the format of coordinate.
	Return:
		iou: iou of box1 and box2.
	"""
	if wh == False:
		xmin1, ymin1, xmax1, ymax1 = box1
		xmin2, ymin2, xmax2, ymax2 = box2
	else:
		xmin1, ymin1 = int(box1[0]-box1[2]/2.0), int(box1[1]-box1[3]/2.0)
		xmax1, ymax1 = int(box1[0]+box1[2]/2.0), int(box1[1]+box1[3]/2.0)
		xmin2, ymin2 = int(box2[0]-box2[2]/2.0), int(box2[1]-box2[3]/2.0)
		xmax2, ymax2 = int(box2[0]+box2[2]/2.0), int(box2[1]+box2[3]/2.0)


	## 计算两个矩形框面积
	area1 = (xmax1-xmin1) * (ymax1-ymin1) 
	area2 = (xmax2-xmin2) * (ymax2-ymin2)
	
	## 获取矩形框交集对应的左上角和右下角的坐标（intersection）
	xx1 = np.max([xmin1, xmin2])
	yy1 = np.max([ymin1, ymin2])
	xx2 = np.min([xmax1, xmax2])
	yy2 = np.min([ymax1, ymax2])
	
	inter_area = (np.max([0, xx2-xx1+1])) * (np.max([0, yy2-yy1+1]))　#计算交集面积
	iou = inter_area / (area1+area2-inter_area+1e-6)　＃计算交并比

	return iou

多维iou

def Ious(bbox, gt):
    assert bbox.shape[0] > 0, 'bbox len must>0'
    assert gt.shape[0] > 0, 'bbox len must>0'
    """
    :param bbox: (n, 4)
    :param gt: (m, 4)
    :return: (n, m)
    numpy 广播机制 从后向前对齐。 维度为1 的可以重复等价为任意维度
    eg: (4,3,2)   (3,2)  (3,2)会扩充为(4,3,2)
        (4,1,2)   (3,2) (4,1,2) 扩充为(4, 3, 2)  (3, 2)扩充为(4, 3,2) 扩充的方法为重复
    广播会在numpy的函数 如sum, maximun等函数中进行
    """
    lt = np.maximum(bbox[:, None, :2], gt[:, :2])  # left_top (x, y)
    rb = np.minimum(bbox[:, None, 2:], gt[:, 2:])  # right_bottom (x, y)
    wh = np.maximum(rb - lt, 0)  # inter_area (w, h)
    inter_areas = wh[:, :, 0] * wh[:, :, 1]  # shape: (n, m)
    box_areas = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1])
    gt_areas = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])
    IoU = inter_areas / (box_areas[:, None] + gt_areas - inter_areas)
    return IoU

GIoU

def GIoU(box1, box2):
    xmin1, ymin1, xmax1, ymax1 = box1
    xmin2, ymin2, xmax2, ymax2 = box2

    area1 = (xmax1-xmin1+1) * (ymax1-ymin1+1)
    area2 = (xmax2-xmin2+1) * (ymax2-ymin2+1)

    xx1 = np.max([xmin1, xmin2])
    yy1 = np.max([ymin1, ymin2])
    xx2 = np.min([xmax1, xmax2])
    yy2 = np.min([ymax1, ymax2])

    A_B = np.max([0, xx2-xx1+1]) * np.max([0, yy2-yy1+1])
    AUB = area1 + area2 - A_B

    iou = A_B / AUB

    xxx1 = np.min([xmin1, xmin2])
    yyy1 = np.min([ymin1, ymin2])
    xxx2 = np.max([xmax1, xmax2])
    yyy2 = np.max([ymax1, ymax2])

    g_area = np.max([0, xxx2 - xxx1 + 1]) * np.max([0, yyy2 - yyy1 + 1])
    print(iou)
    g_iou_add = (g_area-(AUB)) / g_area
    return iou - g_iou_add

非极大值抑制（NMS）

NMS的算法步骤如下：

# INPUT：所有预测出的bounding box (bbx)信息（坐标和置信度confidence），　IOU阈值（大于该阈值的bbx将被移除）
for object in all objects:
	(1) 获取当前目标类别下所有bbx的信息
	(2) 将bbx按照confidence从高到低排序,并记录当前confidence最大的bbx
	(3) 计算最大confidence对应的bbx与剩下所有的bbx的IOU,移除所有大于IOU阈值的bbx
	(4) 对剩下的bbx，循环执行(2)和(3)直到所有的bbx均满足要求（即不能再移除bbx）

需要注意的是，NMS是对所有的类别分别执行的。举个例子，假设最后预测出的矩形框有2类（分别为cup, pen）,在NMS之前，每个类别可能都会有不只一个bbx被预测出来，这个时候我们需要对这两个类别分别执行一次NMS过程。

def nms(boxes, thresh):

    # 计算 n 个候选框的面积大小
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    scores = boxes[:, 4]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    # 对置信度进行排序, 获取排序后的下标序号, argsort 默认从小到大排序
    order = np.argsort(scores)

    keep = []  # 返回值
    while order.size > 0:
        # 将当前置信度最大的框加入返回值列表中
        index = order[-1]
        picked_boxes.append(bounding_boxes[index])

        # 获取当前置信度最大的候选框与其他任意候选框的相交面积
        x11 = np.maximum(x1[index], x1[order[:-1]])
        y11 = np.maximum(y1[index], y1[order[:-1]])
        x22 = np.minimum(x2[index], x2[order[:-1]])
        y22 = np.minimum(y2[index], y2[order[:-1]])
        w = np.maximum(0.0, x22 - x11 + 1)
        h = np.maximum(0.0, y22 - y11 + 1)
        intersection = w * h

        # 利用相交的面积和两个框自身的面积计算框的交并比, 将交并比大于阈值的框删除
        ious = intersection / (areas[index] + areas[order[:-1]] - intersection)
        left = np.where(ious < thresh)
        order = order[left]
    return keep

或者

def nms(dets, thresh=0.35):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    order = np.argsort(scores)[::-1]

    keep = []

    while order.size:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        inter = np.maximum(0.0, xx2 - xx1 + 1) * np.maximum(0.0, yy2 - yy1 + 1)
        ious = inter / (areas[i] + areas[order[1:]] - inter)

        index = np.where(ious <= thresh)[0]
        order = order[index+1]
    return keep

Soft_NMS

def soft_nms(dets, iou_thresh=0.3, sigma=0.5, thresh=0.5, method=2):
	# dets: [x1, y1, x2, y2, score]
    N = dets.shape[0]
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    areas = (x2-x1+1) * (y2-y1+1)

    for i in range(N):
        temp_box = dets[i, :4]
        temp_score = dets[i, 4]
        temp_area = areas[i]
        pos = i + 1

        if i != N-1:
            maxscore = np.max(dets[pos:, 4])
            maxpos = np.argmax(dets[pos:, 4])
        else:
            maxscore = dets[:, 4][-1]
            maxpos = 0

        if temp_score < maxscore:

            dets[i, :4] = dets[maxpos+pos, :4]
            dets[maxpos + pos, :4] = temp_box

            dets[i, 4] = dets[maxpos+pos, 4]
            dets[maxpos+pos, 4] = temp_score

            areas[i] = areas[maxpos+pos]
            areas[maxpos+pos] = temp_area

        xx1 = np.maximum(dets[i, 0], dets[pos:, 0])
        yy1 = np.maximum(dets[i, 1], dets[pos:, 1])
        xx2 = np.minimum(dets[i, 2], dets[pos:, 2])
        yy2 = np.minimum(dets[i, 3], dets[pos:, 3])

        w = np.maximum(0.0, xx2-xx1+1)
        h = np.maximum(0.0, yy2-yy1+1)

        inter = w*h
        ious = inter / (areas[i] + areas[pos:] - inter)

        if method == 1:
            weight = np.ones(ious.shape)
            weight[ious > iou_thresh] = weight[ious > iou_thresh] - ious[ious > iou_thresh]
        elif method == 2:
            weight = np.exp(-ious*ious/sigma)
        else:
            weight = np.ones(ious.shape)
            weight[ious > iou_thresh] = 0

        dets[pos:, 4] = dets[pos:, 4]*weight

    keep = np.where(dets[:, 4] > thresh)[0]
    return keep