首先准备好自己数据集,包括图像imgs、标签文件txt
运行以下代码,设置train,val,test之间的比例,一般0.8:0.1:0.1,本人一般设置为0.8:0.2:0
defect处可设置自己的数据集名称
设置自己数据集的图像路径,txt文件路径
import os
import random
import shutil
# 此处可以设置《数据集文件的名称》,如名称加上图片数量加上时间,方便日后区分
defect = 'wrapSegmentation455_20240528'
# 此处设置train、val、test数据集《划分的比例》,一般设置为8:1:1
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
# 此处设置《图片与txt标签文件的保存路径》
image_original_path = r"D:\1. 缠绕项目\4.数据集\wrap20240528\imgs"
label_original_path = r"D:\1. 缠绕项目\4.数据集\wrap20240528\labels_txt"
cur_path = os.getcwd()
# 训练集路径
train_image_path = os.path.join(cur_path, f"datasets/{defect}/images/train/")
train_label_path = os.path.join(cur_path, f"datasets/{defect}/labels/train/")
# 验证集路径
val_image_path = os.path.join(cur_path, f"datasets/{defect}/images/val/")
val_label_path = os.path.join(cur_path, f"datasets/{defect}/labels/val/")
# 测试集路径
test_image_path = os.path.join(cur_path, f"datasets/{defect}/images/test/")
test_label_path = os.path.join(cur_path, f"datasets/{defect}/labels/test/")
# 训练集目录
list_train = os.path.join(cur_path, f"datasets/{defect}/train.txt")
list_val = os.path.join(cur_path, f"datasets/{defect}/val.txt")
list_test = os.path.join(cur_path, f"datasets/{defect}/test.txt")
# 创建训练集、验证集和测试集的目标文件夹
os.makedirs(train_image_path, exist_ok=True)
os.makedirs(train_label_path, exist_ok=True)
os.makedirs(val_image_path, exist_ok=True)
os.makedirs(val_label_path, exist_ok=True)
os.makedirs(test_image_path, exist_ok=True)
os.makedirs(test_label_path, exist_ok=True)
# 获取所有图片和标签的文件名
image_names = os.listdir(image_original_path)
label_names = os.listdir(label_original_path)
# 将图片和标签配对并打乱顺序
pairs = list(zip(image_names, label_names))
random.shuffle(pairs)
# 计算划分边界
num_pairs = len(pairs)
num_train = int(num_pairs * train_ratio)
num_val = int(num_pairs * val_ratio)
# 创建并打开输出文件
file_train = open(list_train, 'w')
file_val = open(list_val, 'w')
file_test = open(list_test, 'w')
# 将图片和标签分别复制到对应的目标文件夹下,并将图片路径写入对应的输出文件
for i, (image_name, label_name) in enumerate(pairs):
name, ext = os.path.splitext(image_name)
if i < num_train:
dst_train_Image = os.path.join(train_image_path, name + ext)
dst_train_Label = os.path.join(train_label_path, name + '.txt')
srcImage = os.path.join(image_original_path, image_name)
srcLabel = os.path.join(label_original_path, label_name)
shutil.copyfile(srcImage, dst_train_Image)
shutil.copyfile(srcLabel, dst_train_Label)
file_train.write(dst_train_Image + '\n')
elif i < num_train + num_val:
dst_val_Image = os.path.join(val_image_path, name + ext)
dst_val_Label = os.path.join(val_label_path, name + '.txt')
srcImage = os.path.join(image_original_path, image_name)
srcLabel = os.path.join(label_original_path, label_name)
shutil.copyfile(srcImage, dst_val_Image)
shutil.copyfile(srcLabel, dst_val_Label)
file_val.write(dst_val_Image + '\n')
else:
dst_test_Image = os.path.join(test_image_path, name + ext)
dst_test_Label = os.path.join(test_label_path, name + '.txt')
srcImage = os.path.join(image_original_path, image_name)
srcLabel = os.path.join(label_original_path, label_name)
shutil.copyfile(srcImage, dst_test_Image)
shutil.copyfile(srcLabel, dst_test_Label)
file_test.write(dst_test_Image + '\n')
# 关闭输出文件
file_train.close()
file_val.close()
file_test.close()
运行完成后会生成以下效果,但不包含yaml文件!
通过本文生成的数据集,yaml文件可以如下编写,只需修改nc,names,编写成功后将yaml文件存放在如上路径
此时数据集已经准备完成,在训练代码中data选择上述yaml文件的路径,就可以进行自己的YOLO数据集训练了