NMS和soft-NMS

最新推荐文章于 2024-04-05 15:17:52 发布

转载最新推荐文章于 2024-04-05 15:17:52 发布 · 810 阅读

文章标签：

#算法 #数据结构 #目标检测

算法专栏收录该内容

0 篇文章

订阅专栏

本文深入解析了Non-Maximum Suppression (NMS)算法，一种在目标检测中广泛使用的技术，旨在去除重复的边界框，提升检测精度。文章提供了NMS的C++和Python实现代码，并介绍了soft-NMS这一优化变体。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

NMS在目标检测中非常常用，核心原理很容易理解，2017年开始相继出现了soft-NMS等等NMS的变体，优化的核心都是尽可能去除重复框的同时尽可能保证不漏检，以下是NMS的C++实现

static void sort(int n, const float* x, int* indices)  
{  
// 排序函数(降序排序)，排序后进行交换的是indices中的数据  
// n：排序总数// x：带排序数// indices：初始为0~n-1数目   
  
    int i, j;  
    for (i = 0; i < n; i++)  
        for (j = i + 1; j < n; j++)  
        {  
            if (x[indices[j]] > x[indices[i]])  
            {  
                //float x_tmp = x[i];  
                int index_tmp = indices[i];  
                //x[i] = x[j];  
                indices[i] = indices[j];  
                //x[j] = x_tmp;  
                indices[j] = index_tmp;  
            }  
        }  
}


int nonMaximumSuppression(int numBoxes, const CvPoint *points,  
                          const CvPoint *oppositePoints, const float *score,  
                          float overlapThreshold,  
                          int *numBoxesOut, CvPoint **pointsOut,  
                          CvPoint **oppositePointsOut, float **scoreOut)  
{  
  
// numBoxes：窗口数目// points：窗口左上角坐标点// oppositePoints：窗口右下角坐标点  
// score：窗口得分// overlapThreshold：重叠阈值控制// numBoxesOut：输出窗口数目  
// pointsOut：输出窗口左上角坐标点// oppositePoints：输出窗口右下角坐标点  
// scoreOut：输出窗口得分  
    int i, j, index;  
    float* box_area = (float*)malloc(numBoxes * sizeof(float));    // 定义窗口面积变量并分配空间   
    int* indices = (int*)malloc(numBoxes * sizeof(int));          // 定义窗口索引并分配空间   
    int* is_suppressed = (int*)malloc(numBoxes * sizeof(int));    // 定义是否抑制表标志并分配空间   
    // 初始化indices、is_supperssed、box_area信息   
    for (i = 0; i < numBoxes; i++)  
    {  
        indices[i] = i;  
        is_suppressed[i] = 0;  
        box_area[i] = (float)( (oppositePoints[i].x - points[i].x + 1) *  
                                (oppositePoints[i].y - points[i].y + 1));  
    }  
    // 对输入窗口按照分数比值进行排序，排序后的编号放在indices中   
    sort(numBoxes, score, indices);  
    for (i = 0; i < numBoxes; i++)                // 循环所有窗口   
    {  
        if (!is_suppressed[indices[i]])           // 判断窗口是否被抑制   
        {  
            for (j = i + 1; j < numBoxes; j++)    // 循环当前窗口之后的窗口   
            {  
                if (!is_suppressed[indices[j]])   // 判断窗口是否被抑制   
                {  
                    int x1max = max(points[indices[i]].x, points[indices[j]].x);                     // 求两个窗口左上角x坐标最大值   
                    int x2min = min(oppositePoints[indices[i]].x, oppositePoints[indices[j]].x);     // 求两个窗口右下角x坐标最小值   
                    int y1max = max(points[indices[i]].y, points[indices[j]].y);                     // 求两个窗口左上角y坐标最大值   
                    int y2min = min(oppositePoints[indices[i]].y, oppositePoints[indices[j]].y);     // 求两个窗口右下角y坐标最小值   
                    int overlapWidth = x2min - x1max + 1;            // 计算两矩形重叠的宽度   
                    int overlapHeight = y2min - y1max + 1;           // 计算两矩形重叠的高度   
                    if (overlapWidth > 0 && overlapHeight > 0)  
                    {  
                        float overlapPart = (overlapWidth * overlapHeight) / box_area[indices[j]];    // 计算重叠的比率   
                        if (overlapPart > overlapThreshold)          // 判断重叠比率是否超过重叠阈值   
                        {  
                            is_suppressed[indices[j]] = 1;           // 将窗口j标记为抑制   
                        }  
                    }  
                }  
            }  
        }  
    }  
  
    *numBoxesOut = 0;    // 初始化输出窗口数目0   
    for (i = 0; i < numBoxes; i++)  
    {  
        if (!is_suppressed[i]) (*numBoxesOut)++;    // 统计输出窗口数目   
    }  
  
    *pointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint));           // 分配输出窗口左上角坐标空间   
    *oppositePointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint));   // 分配输出窗口右下角坐标空间   
    *scoreOut = (float *)malloc((*numBoxesOut) * sizeof(float));                // 分配输出窗口得分空间   
    index = 0;  
    for (i = 0; i < numBoxes; i++)                  // 遍历所有输入窗口   
    {  
        if (!is_suppressed[indices[i]])             // 将未发生抑制的窗口信息保存到输出信息中   
        {  
            (*pointsOut)[index].x = points[indices[i]].x;  
            (*pointsOut)[index].y = points[indices[i]].y;  
            (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;  
            (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;  
            (*scoreOut)[index] = score[indices[i]];  
            index++;  
        }  
  
    }  
  
    free(indices);          // 释放indices空间   
    free(box_area);         // 释放box_area空间   
    free(is_suppressed);    // 释放is_suppressed空间   
  
    return LATENT_SVM_OK;  
}

以下是python版本的实现

import numpy as np  
  
def py_cpu_nms(dets, thresh):  
    """Pure Python NMS baseline."""  
    x1 = dets[:, 0]  
    y1 = dets[:, 1]  
    x2 = dets[:, 2]  
    y2 = dets[:, 3]  
    scores = dets[:, 4]  #bbox打分
  
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)  
#打分从大到小排列，取index  
    order = scores.argsort()[::-1]  
#keep为最后保留的边框  
    keep = []  
    while order.size > 0:  
#order[0]是当前分数最大的窗口，肯定保留  
        i = order[0]  
        keep.append(i)  
#计算窗口i与其他所有窗口的交叠部分的面积
        xx1 = np.maximum(x1[i], x1[order[1:]])  
        yy1 = np.maximum(y1[i], y1[order[1:]])  
        xx2 = np.minimum(x2[i], x2[order[1:]])  
        yy2 = np.minimum(y2[i], y2[order[1:]])  
  
        w = np.maximum(0.0, xx2 - xx1 + 1)  
        h = np.maximum(0.0, yy2 - yy1 + 1)  
        inter = w * h  
#交/并得到iou值  
        ovr = inter / (areas[i] + areas[order[1:]] - inter)  
#inds为所有与窗口i的iou值小于threshold值的窗口的index，其他窗口此次都被窗口i吸收  
        inds = np.where(ovr <= thresh)[0]  
#order里面只保留与窗口i交叠面积小于threshold的那些窗口，由于ovr长度比order长度少1(不包含i)，所以inds+1对应到保留的窗口
        order = order[inds + 1]  
  
    return keep

以下是soft-NMS的具体实现，由于score每次都需要改变，因为取消了排序的过程，两次循环中，先寻找当前score的最大值，和外层循环位置进行交换，再在第二层循环中对后续的框进行score衰减

def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
    cdef unsigned int N = boxes.shape[0]
    cdef float iw, ih, box_area
    cdef float ua
    cdef int pos = 0
    cdef float maxscore = 0
    cdef int maxpos = 0
    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov

    for i in range(N):
        maxscore = boxes[i, 4]
        maxpos = i

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]

        pos = i + 1
    # get max box
        while pos < N:
            if maxscore < boxes[pos, 4]:
                maxscore = boxes[pos, 4]
                maxpos = pos
            pos = pos + 1

    # add max box as a detection 
        boxes[i,0] = boxes[maxpos,0]
        boxes[i,1] = boxes[maxpos,1]
        boxes[i,2] = boxes[maxpos,2]
        boxes[i,3] = boxes[maxpos,3]
        boxes[i,4] = boxes[maxpos,4]

    # swap ith box with position of max box
        boxes[maxpos,0] = tx1
        boxes[maxpos,1] = ty1
        boxes[maxpos,2] = tx2
        boxes[maxpos,3] = ty2
        boxes[maxpos,4] = ts

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]

        pos = i + 1
    # NMS iterations, note that N changes if detection boxes fall below threshold
        while pos < N:
            x1 = boxes[pos, 0]
            y1 = boxes[pos, 1]
            x2 = boxes[pos, 2]
            y2 = boxes[pos, 3]
            s = boxes[pos, 4]

            area = (x2 - x1 + 1) * (y2 - y1 + 1)
            iw = (min(tx2, x2) - max(tx1, x1) + 1)
            if iw > 0:
                ih = (min(ty2, y2) - max(ty1, y1) + 1)
                if ih > 0:
                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
                    ov = iw * ih / ua #iou between max box and detection box

                    if method == 1: # linear
                        if ov > Nt: 
                            weight = 1 - ov
                        else:
                            weight = 1
                    elif method == 2: # gaussian
                        weight = np.exp(-(ov * ov)/sigma)
                    else: # original NMS
                        if ov > Nt: 
                            weight = 0
                        else:
                            weight = 1

                    boxes[pos, 4] = weight*boxes[pos, 4]

            # if box score falls below threshold, discard the box by swapping with last box
            # update N
                    if boxes[pos, 4] < threshold:
                        boxes[pos,0] = boxes[N-1, 0]
                        boxes[pos,1] = boxes[N-1, 1]
                        boxes[pos,2] = boxes[N-1, 2]
                        boxes[pos,3] = boxes[N-1, 3]
                        boxes[pos,4] = boxes[N-1, 4]
                        N = N - 1
                        pos = pos - 1

            pos = pos + 1

    keep = [i for i in range(N)]
    return keep