按8:2切分数据集将数据集分成0.8训练集和0.2的测试集
import os
import random
import shutil
annotations_dir = '/home/dataset/VOC2007/Annotations'
images_dir = '/home/dataset/VOC2007/JPEGImages'
train_annotations_dir = '/home/dataset/VOC2007_new/train/Annotations'
train_images_dir = '/home/dataset/VOC2007_new/train/JPEGImages'
val_annotations_dir = '/home/dataset/VOC2007_new/val/Annotations'
val_images_dir = '/home/dataset/VOC2007_new/val/JPEGImages'
os.makedirs(train_annotations_dir, exist_ok=True)
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(val_annotations_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
xml_files = os.listdir(annotations_dir)
jpg_files = os.listdir(images_dir)
random.shuffle(xml_files)
num_train = int(0.8 * len(xml_files))
num_val = len(xml_files) - num_train
for xml_file in xml_files[:num_train]:
img_file = xml_file.replace('.xml', '.jpg').lower()
xml_file = xml_file.lower()
if img_file in jpg_files:
shutil.move(os.path.join(annotations_dir, xml_file), os.path.join(train_annotations_dir, xml_file))
shutil.move(os.path.join(images_dir, img_file), os.path.join(train_images_dir, img_file))
jpg_files.remove(img_file)
for xml_file in xml_files[num_train:]:
img_file = xml_file.replace('.xml', '.jpg').lower()
xml_file = xml_file.lower()
if img_file in jpg_files:
shutil.move(os.path.join(annotations_dir, xml_file), os.path.join(val_annotations_dir, xml_file))
shutil.move(os.path.join(images_dir, img_file), os.path.join(val_images_dir, img_file))
jpg_files.remove(img_file)
print("数据集分割完成!")
VOC转COCO格式数据集样例代码
import os
import json
import xml.etree.ElementTree as ET
import glob
PRE_DEFINE_CATEGORIES = None
START_BOUNDING_BOX_ID = 2