调包:
import os
from lxml import etree
设置新旧标签存放的文件夹路径变量,读取全部文件files:
source_path = 'D:/source/label/path/'
label_path = 'D:/new/label/path'
#创建txt标签存放路径
if not os.path.exists(label_path):
os.mkdir(label_path)
#获取文件名称列表
files = os.listdir(source_path)
读取全部标签文件中的类别名称,并生成类别与数字的对应关系字典:
#获取分类名称列表
def get_classes(files,source_path):
class_set = set([])
for file in files:
with open(source_path+file,'rb') as fb:
#解析xml文件
xml = etree.HTML(fb.read())
labels = xml.xpath('//object')
for label in labels:
name = label.xpath('./name/text()')[0]
class_set.add(name)
return list(class_set)
classes = get_classes(files,source_path)
#生成分类字典,列如{'apple':0,'banana':1}
class_dict = dict(zip(classes,range(len(classes))))
将xml标签文件转化为txt