算法原文:SSD: Single Shot MultiBox Detector
github链接:https://github.com/pierluigiferrari/ssd_keras#overview
以ssd7作为主程序一共如下几类:
- train_ssd7
- keras_ssd7
- keras_ssd_loss
- ssd_box_encode_decode_utils
- ssd_batch_generator
ssd_box_encode_decode_utils
代码主要作用:
- 对生成的dafult box进行IOU检测
- 对其进行非极大抑制
- 将其做成model对SSD进行输出
- 将其做成model提供给SSD主程序使用
def iou(boxes1, boxes2, coords='centroids'):
# IOU函数:模型产生的目标窗口和原来标记窗口的交叠率
# 矩阵box有两种储存方式:一维状态下包含一个box所需要的坐标数据,2维下是n个box所需要的数据
if len(boxes1.shape) > 2: raise ValueError("boxes1 must have rank either 1 or 2, but has rank {}.".format(len(boxes1.shape)))
if len(boxes2.shape) > 2: raise ValueError("boxes2 must have rank either 1 or 2, but has rank {}.".format(len(boxes2.shape)))
# 如果输入了一维矩阵,则对其补0扩维
if len(boxes1.shape) == 1: boxes1 = np.expand_dims(boxes1, axis=0)
if len(boxes2.shape) == 1: boxes2 = np.expand_dims(boxes2, axis=0)
# box是存储有四个方向坐标的,如果缺少坐标则判断错误
if not (boxes1.shape[1] == boxes2.shape[1] == 4): raise ValueError("It must be boxes1.shape[1] == boxes2.shape[1] == 4, but it is boxes1.shape[1] == {}, boxes2.shape[1] == {}.".format(boxes1.shape[1], boxes2.shape[1]))
# 判断矩阵的坐标格式:满足则为(cx,cy,w,h),否则(xmin,xmax,ymin,ymax)
if coords == 'centroids':
boxes1 = convert_coordinates(boxes1, start_index=0, conversion='centroids2minmax')
boxes2 = convert_coordinates(boxes2, start_index=0, conversion='centroids2minmax')
elif coords != 'minmax':
raise ValueError("Unexpected value for `coords`. Supported values are 'minmax' and 'centroids'.")
#交集比并集并对其进行非极大抑制,最终返回
intersection = np.maximum(0, np.minimum(boxes1[:,1], boxes2[:,1]) - np.maximum(boxes1[:,0], boxes2[:,0])) * np.maximum(0, np.minimum(boxes1[:,3], boxes2[:,3]) - np.maximum(boxes1[:,2], boxes2[:,2]))
union = (boxes1[:,1] - boxes1[:,0]) * (boxes1[:,3] - boxes1[:,2]) + (boxes2[:,1] - boxes2[:,0]) * (boxes2[:,3] - boxes2[:,2]) - intersection
return intersection / union
def convert_coordinates(tensor, start_index, conversion='minmax2centroids'):
#转换坐标函数:将box轴对齐,其实就是放在一个中心下,但是转换会有误差
#参数:1.N维包含四个坐标的长量,即N个输入框
#2.在tensor的最后一维进行坐标变换
#3.默认minmax2centroids,对box进行转向
ind = start_index
tensor1 = np.copy(tensor).astype(np.float) #copy后改变不会影响原tensor
if conversion == 'minmax2centroids':
tensor1[..., ind] = (tensor[..., ind] + tensor[..., ind+1]) / 2.0
tensor1[..., ind+1] = (tensor[..., ind+2] + tensor[..., ind+3]) / 2.0
tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind]
tensor1[..., ind+3] = tensor[..., ind+3] - tensor[..., ind+2]
elif conversion == 'centroids2minmax':
tensor1[..., ind] = tensor[..., ind] - tensor[..., ind+2] / 2.0
tensor1[..., ind+1] = tensor[..., ind] + tensor[..., ind+2] / 2.0
tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind+3] / 2.0
tensor1[..., ind+3] = tensor[..., ind+1] + tensor[..., ind+3] / 2.0 #
else:
raise ValueError("Unexpected conversion value. Supported values are 'minmax2centroids' and 'centroids2minmax'.")
return tensor1
def convert_coordinates2(tensor, start_index, conversion='minmax2centroids'):
#对上面的convert_coordinates矩阵乘法实现
#这里给出了不适用的原因:偏多的系数矩阵相乘是非常浪费时间并且没意义的
ind = start_index
tensor1 = np.copy(tensor).astype(np.float)
if conversion == 'minmax2centroids':
M = np.array([[0.5, 0. , -1., 0.],
[0.5, 0. , 1., 0.],
[0. , 0.5, 0., -1.],
[0. , 0.5, 0., 1.]])
tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M)
elif conversion == 'centroids2minmax':
M = np.array([[ 1. , 1. , 0. , 0. ],
[ 0. , 0. , 1. , 1. ],
[-0.5, 0.5, 0. , 0. ],
[ 0. , 0. , -0.5, 0.5]])
tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M)
else:
raise ValueError("Unexpected conversion value. Supported values are 'minmax2centroids' and 'centroids2minmax'.")
return tensor1
def greedy_nms(y_pred_decoded, iou_threshold=0.45, coords='minmax'):
'''
NMS(非极大值抑制):
对输入框向量进行非极大贪婪抑制,主要是消除多余的框,找到最佳匹配的位置
设定一个IOU的阈值,将输入框的A,B,C...与最后的F相比如果重叠度大于IOU的阈值则扔掉原本
设定A和C与F进行重叠比较,如果大于IOU设定阈值则排除AC保留F
顾名思义,保留最大的,抑制较小的
'''
'''
参数设定:
y_pred_decoded(list变量):就是一批输入框,N个输入框,每个框为一个二维numpy的数组,比如有k个框,那么此参数表示为[k,6],参数6代表[class_id, score, xmin, xmax, ymin, ymax]向量。
iou_threshold(float变量):所有的box都会进行相似度度量,如果相似度大于设定的IOU阈值,则从本地列表中删除该框,默认为0.45
coords(str变量):设定y_pred_decoded的输入格式
'''
y_pred_decoded_nms = [] #候选框
for batch_item in y_pred_decoded:
boxes_left = np.copy(batch_item) #先将当前的box进行复制
maxima = [] #存非极大抑制box的临时数组
while boxes_left.shape[0] > 0:#看box个数是否为0
maximum_index = np.argmax(boxes_left[:,1])
maximum_box = np.copy(boxes_left[maximum_index])
maxima.append(maximum_box) #存储极大框
boxes_left = np.delete(boxes_left, maximum_index, axis=0) #删除极大框
if boxes_left.shape[0] == 0: break