用于目标识别的数据需要以下6个字段
图像名称(完整路径),width,height,class,xmin,ymin,xmax,ymax
# coding: utf-8
import glob
import pandas as pd
import xml.etree.ElementTree as ET
classes = ["player","jiangshi"]
def xml_to_csv(path):
train_list = []
eval_list = []
for cls in classes:
xml_list = []
# 读取标注文件
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
if cls == member[0].text:
value = (path + root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
for i in range(0,int(len(xml_list) * 0.9)):
train_list.append(xml_list[i])
for j in range(int(len(xml_list) * 0.9) + 1,int(len(xml_list))):
eval_list.append(xml_list[j])
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
# 保存为CSV格式
train_df = pd.DataFrame(train_list, columns=column_name)
eval_df = pd.DataFrame(eval_list, columns=column_name)
train_df.to_csv(path +'train.csv', index=None)
eval_df.to_csv(path + 'eval.csv', index=None)
def main():
# path参数跟具自己xml文件所在的文件夹路径修改
path = 'C:/Users/53111/Desktop/voc2007/JPEGImages/'
xml_to_csv(path)
print('Successfully converted xml to csv.')
main()