parser = argparse.ArgumentParser()
#xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument(‘–xml_path’, default=‘VOCdevkit/VOC2007/Annotations’, type=str, help=‘input xml label path’)
#数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument(‘–txt_path’, default=‘VOCdevkit/VOC2007/ImageSets/Main’, type=str, help=‘output txt label path’)
opt = parser.parse_args()
trainval_percent = 0.8 # 训练+验证集一共所占的比例为0.8(注意看清楚),剩下的0.2就是测试集
train_percent = 0.8 # 训练集在训练集和验证集总集合中占的比例(注意看清楚是谁占谁的比例),可自己进行调整
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)
num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)
file_trainval = open(txtsavepath + ‘/trainval.txt’, ‘w’)
file_test = open(txtsavepath + ‘/test.txt’, ‘w’)
file_train = open(txtsavepath + ‘/train.txt’, ‘w’)
file_val = open(txtsavepath + ‘/val.txt’, ‘w’)
for i in list_index:
name = total_xml[i][:-4] + ‘\n’
if i in trainval:
file_trainval.write(name)
if i in train:
file_train.write(name)
else:
file_val.write(name)
else:
file_test.write(name)
file_trainval.close()
file_train.close()
file_val.close()
file_test.close()
2022/09/11添加新版本代码如下:
-*- coding: utf-8 -*-
“”"
Author:smile
Date:2022/09/11 10:00
顺序:脚本A1
简介:分训练集、验证集和测试集,按照 8:1:1 的比例来分,训练集8,验证集1,测试集1
“”"
import os
import random
import argparse