1. COCO格式 vs VOC格式看这里
COCO2014据集:
VOC2007数据集:
txt文件(中心点坐标+宽高,相对值):
45 0.479492 0.688771 0.955609 0.595500
45 0.736516 0.247188 0.498875 0.476417
50 0.637063 0.732938 0.494125 0.510583
45 0.339438 0.418896 0.678875 0.781500
49 0.646836 0.132552 0.118047 0.096937
49 0.773148 0.129802 0.090734 0.097229
49 0.668297 0.226906 0.131281 0.146896
49 0.642859 0.079219 0.148063 0.148062
xml文件(左上角+右下角,真实值):
<annotation>
<folder>VOC2007</folder>
<filename>000001.jpg</filename>
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>341012865</flickrid>
</source>
<owner>
<flickrid>Fried Camels</flickrid>
<name>Jinky the Fruit Bat</name>
</owner>
<size>
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>dog</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>12</ymin>
<xmax>352</xmax>
<ymax>498</ymax>
</bndbox>
</object>
</annotation>
2. txt文件转为xml文件
import glob
import cv2
xml_head = '''<annotation>
<folder>VOC2007</folder>
<filename>{}</filename>.
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
</source>
<size>
<width>{}</width>
<height>{}</height>
<depth>{}</depth>
</size>
<segmented>0</segmented>
'''
xml_obj = '''
<object>
<name>{}</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>{}</xmin>
<ymin>{}</ymin>
<xmax>{}</xmax>
<ymax>{}</ymax>
</bndbox>
</object>
'''
xml_end = '''
</annotation>'''
#--data
#----train 训练集图片
#----train_txt 对应的txt标签
#----train_xml 对应的xml标签
root='E:/pycharm_codes/data/'
txt_Lists = glob.glob(root +'train'+ '/*.jpg')
print(len(txt_Lists))
# print(txt_Lists)
cnt=0
for txt_path in txt_Lists:
filename=txt_path.split('\\')
filename=filename[-1]
filename=filename.split('.')
filename=filename[0]
txt = root+'train_txt/'+filename+'.txt'
jpg=root+'train/'+filename+'.jpg' #jpg path
xml=root+'train_xml/'+filename+'.xml'
print(txt)
print(jpg)
print(xml)
obj = ''
img = cv2.imread(jpg)
img_h, img_w = img.shape[0], img.shape[1]
print('h_factor:',img_h,' w_factor:',img_w)
# cv2.imshow("img", img) #显示图片
# cv2.waitKey(0)
# cv2.destroyWindow("img")
head = xml_head.format(str(filename), str(img_w), str(img_h), "3")
with open(txt, 'r') as f:
for line in f.readlines():
yolo_datas = line.strip().split(' ')
label = int(float(yolo_datas[0].strip()))
center_x = round(float(str(yolo_datas[1]).strip()) * img_w)
center_y = round(float(str(yolo_datas[2]).strip()) * img_h)
bbox_width = round(float(str(yolo_datas[3]).strip()) * img_w)
bbox_height = round(float(str(yolo_datas[4]).strip()) * img_h)
xmin = str(int(center_x - bbox_width / 2))
ymin = str(int(center_y - bbox_height / 2))
xmax = str(int(center_x + bbox_width / 2))
ymax = str(int(center_y + bbox_height / 2))
obj += xml_obj.format(labels[label], xmin, ymin, xmax, ymax)
with open(xml, 'w') as f_xml:
f_xml.write(head + obj + xml_end)
cnt += 1
print(cnt)