k-means需要有数据,中心点个数是需要人为指定的,位置可以随机初始化,但是还需要度量到聚类中心的距离。这里怎么度量这个距离是很关键的。
距离度量如果使用标准的欧氏距离,大盒子会比小盒子产生更多的错误。例。因此这里使用其他的距离度量公式。聚类的目的是anchor boxes和临近的ground truth有更大的IOU值,这和anchor box的尺寸没有直接关系。自定义的距离度量公式:
到聚类中心的距离越小越好,但IOU值是越大越好,所以使用 1 - IOU,这样就保证距离越小,IOU值越大。
使用的聚类原始数据是只有标注框的检测数据集,YOLOv2、v3都会生成一个包含标注框位置和类别的TXT文件,其中每行都包含
,即ground truth boxes相对于原图的坐标,
是框的中心点,
是框的宽和高,N是所有标注框的个数;
首先给定k个聚类中心点
,这里的
是anchor boxes的宽和高尺寸,由于anchor boxes位置不固定,所以没有(x,y)的坐标,只有宽和高;
计算每个标注框和每个聚类中心点的距离 d=1-IOU(标注框,聚类中心),计算时每个标注框的中心点都与聚类中心重合,这样才能计算IOU值,即
。将标注框分配给“距离”最近的聚类中心;
所有标注框分配完毕以后,对每个簇重新计算聚类中心点,计算方式为
,
是第i个簇的标注框个数,就是求该簇中所有标注框的宽和高的平均值。
重复第3、4步,直到聚类中心改变量很小。
代码实现主要是AlexeyAB/darknet中scripts/gen_anchors.py,这里根据yolov2,yolov3的版本不同进行部分修改。yolov2的配置文件yolov2.cfg需要的anchors是相对特征图的,值很小基本都小于13;yolov3的配置文件yolov3.cfg需要的3个anchors是相对于原图来说的,相对都比较大。还有输入图片的大小(32的倍数)对于输出也是有影响的。
例:
yolov2.cfg中[region] anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
yolov3.cfg中[region] anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
-
from os import listdir
-
from os.path import isfile, join
-
import argparse
-
#import cv2
-
import numpy as np
-
import sys
-
import os
-
import shutil
-
import random
-
import math
-
def IOU(x,centroids):
-
'''
-
:param x: 某一个ground truth的w,h
-
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
-
:return: 单个ground truth box与所有k个anchor box的IoU值集合
-
'''
-
IoUs = []
-
w, h = x # ground truth的w,h
-
for centroid in centroids:
-
c_w,c_h = centroid #anchor的w,h
-
if c_w>=w and c_h>=h: #anchor包围ground truth
-
iou = w*h/(c_w*c_h)
-
elif c_w>=w and c_h<=h: #anchor宽矮
-
iou = w*c_h/(w*h + (c_w-w)*c_h)
-
elif c_w<=w and c_h>=h: #anchor瘦长
-
iou = c_w*h/(w*h + c_w*(c_h-h))
-
else: #ground truth包围anchor means both w,h are bigger than c_w and c_h respectively
-
iou = (c_w*c_h)/(w*h)
-
IoUs.append(iou) # will become (k,) shape
-
return np.array(IoUs)
-
def avg_IOU(X,centroids):
-
'''
-
:param X: ground truth的w,h的集合[(w,h),(),...]
-
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
-
'''
-
n,d = X.shape
-
sum = 0.
-
for i in range(X.shape[0]):
-
sum+= max(IOU(X[i],centroids)) #返回一个ground truth与所有anchor的IoU中的最大值
-
return sum/n #对所有ground truth求平均
-
def write_anchors_to_file(centroids,X,anchor_file,input_shape,yolo_version):
-
'''
-
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
-
:param X: ground truth的w,h的集合[(w,h),(),...]
-
:param anchor_file: anchor和平均IoU的输出路径
-
'''
-
f = open(anchor_file,'w')
-
anchors = centroids.copy()
-
print(anchors.shape)
-
if yolo_version=='yolov2':
-
for i in range(anchors.shape[0]):
-
#yolo中对图片的缩放倍数为32倍,所以这里除以32,
-
# 如果网络架构有改变,根据实际的缩放倍数来
-
#求出anchor相对于缩放32倍以后的特征图的实际大小(yolov2)
-
anchors[i][0]*=input_shape/32.
-
anchors[i][1]*=input_shape/32.
-
elif yolo_version=='yolov3':
-
for i in range(anchors.shape[0]):
-
#求出yolov3相对于原图的实际大小
-
anchors[i][0]*=input_shape
-
anchors[i][1]*=input_shape
-
else:
-
print("the yolo version is not right!")
-
exit(-1)
-
widths = anchors[:,0]
-
sorted_indices = np.argsort(widths)
-
print('Anchors = ', anchors[sorted_indices])
-
for i in sorted_indices[:-1]:
-
f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1]))
-
#there should not be comma after last anchor, that's why
-
f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1]))
-
f.write('%f\n'%(avg_IOU(X,centroids)))
-
print()
-
def kmeans(X,centroids,eps,anchor_file,input_shape,yolo_version):
-
N = X.shape[0] #ground truth的个数
-
iterations = 0
-
print("centroids.shape",centroids)
-
k,dim = centroids.shape #anchor的个数k以及w,h两维,dim默认等于2
-
prev_assignments = np.ones(N)*(-1) #对每个ground truth分配初始标签
-
iter = 0
-
old_D = np.zeros((N,k)) #初始化每个ground truth对每个anchor的IoU
-
while True:
-
D = []
-
iter+=1
-
for i in range(N):
-
d = 1 - IOU(X[i],centroids)
-
D.append(d)
-
D = np.array(D) # D.shape = (N,k) 得到每个ground truth对每个anchor的IoU
-
print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D)))) #计算每次迭代和前一次IoU的变化值
-
#assign samples to centroids
-
assignments = np.argmin(D,axis=1) #将每个ground truth分配给距离d最小的anchor序号
-
if (assignments == prev_assignments).all() : #如果前一次分配的结果和这次的结果相同,就输出anchor以及平均IoU
-
print("Centroids = ",centroids)
-
write_anchors_to_file(centroids,X,anchor_file,input_shape,yolo_version)
-
return
-
#calculate new centroids
-
centroid_sums=np.zeros((k,dim),np.float) #初始化以便对每个簇的w,h求和
-
for i in range(N):
-
centroid_sums[assignments[i]]+=X[i] #将每个簇中的ground truth的w和h分别累加
-
for j in range(k): #对簇中的w,h求平均
-
centroids[j] = centroid_sums[j]/(np.sum(assignments==j)+1)
-
prev_assignments = assignments.copy()
-
old_D = D.copy()
-
def main(argv):
-
parser = argparse.ArgumentParser()
-
parser.add_argument('-filelist', default = r'E:\BaiduNetdiskDownload\darknetHG8245\scripts\train.txt',
-
help='path to filelist\n' )
-
parser.add_argument('-output_dir', default = r'E:\BaiduNetdiskDownload\darknetHG8245', type = str,
-
help='Output anchor directory\n' )
-
parser.add_argument('-num_clusters', default = 0, type = int,
-
help='number of clusters\n' )
-
'''
-
需要注意的是yolov2输出的值比较小是相对特征图来说的,
-
yolov3输出值较大是相对原图来说的,
-
所以yolov2和yolov3的输出是有区别的
-
'''
-
parser.add_argument('-yolo_version', default='yolov2', type=str,
-
help='yolov2 or yolov3\n')
-
parser.add_argument('-yolo_input_shape', default=416, type=int,
-
help='input images shape,multiples of 32. etc. 416*416\n')
-
args = parser.parse_args()
-
if not os.path.exists(args.output_dir):
-
os.mkdir(args.output_dir)
-
f = open(args.filelist)
-
lines = [line.rstrip('\n') for line in f.readlines()]
-
annotation_dims = []
-
for line in lines:
-
line = line.replace('JPEGImages','labels')
-
line = line.replace('.jpg','.txt')
-
line = line.replace('.png','.txt')
-
print(line)
-
f2 = open(line)
-
for line in f2.readlines():
-
line = line.rstrip('\n')
-
w,h = line.split(' ')[3:]
-
#print(w,h)
-
annotation_dims.append((float(w),float(h)))
-
annotation_dims = np.array(annotation_dims) #保存所有ground truth框的(w,h)
-
eps = 0.005
-
if args.num_clusters == 0:
-
for num_clusters in range(1,11): #we make 1 through 10 clusters
-
anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters))
-
indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
-
centroids = annotation_dims[indices]
-
kmeans(annotation_dims,centroids,eps,anchor_file,args.yolo_input_shape,args.yolo_version)
-
print('centroids.shape', centroids.shape)
-
else:
-
anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters))
-
indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)]
-
centroids = annotation_dims[indices]
-
kmeans(annotation_dims,centroids,eps,anchor_file,args.yolo_input_shape,args.yolo_version)
-
print('centroids.shape', centroids.shape)
-
if __name__=="__main__":
-
main(sys.argv)
这是其中的yolov3的结果