近来发现平时写的小脚本总是找不到位置了,不如直接写成博客,方便自己查找,也方便他人参考。
此脚本实现:
1.PASCAL VOC格式的数据集转YOLO格式。
2.适配多种图片格式
3.支持重新读写图片,解决训练时经常出现的图片通道问题。(当然可以直接copy也没问题)
4.支持自定义设置保存全量图片或只保存拥有标注文件的图片
一、具体代码:
# -*- coding: utf-8 -*-
"""
Created on 2024/12/9 上午9:56
@author: Jun Wei Xin
@contact: jerson_al@163.com
"""
import os
from xml.dom import minidom
import shutil
from PIL import Image
def voc_to_yolo(src_label_root, src_img_root, dst_label_root, dst_img_root, class_map,
is_whole_data=True, is_review_img=False, is_copy_img=False):
"""
Convert VOC format annotations to YOLO format with options for handling images.
Args:
src_label_root (str): Source directory containing VOC XML label files.
src_img_root (str): Source directory containing images.
dst_label_root (str): Destination directory for YOLO labels.
dst_img_root (str): Destination directory for images.
class_map (dict): Mapping of class names to YOLO class IDs.
is_whole_data (bool): Whether to save all images even without labels.
is_review_img (bool): Whether to rewrite images to the destination.
is_copy_img (bool): Whether to copy images to the destination.
"""
processed_count = 0
skipped_count = 0
error_count = 0
errors = []
os.makedirs(dst_label_root, exist_ok=True)
if is_review_img or is_copy_img or is_whole_data:
os.makedirs(dst_img_root, exist_ok=True)
# Create a set of all label files (without extensions)
label_files = {os.path.splitext(file)[0] for file in os.listdir(src_label_root) if file.endswith('.xml')}
# Process all images in the source image directory
for file in os.listdir(src_img_root):
img_path = os.path.join(src_img_root, file)
if os.path.splitext(file)[1].lower() not in supported_image_formats:
continue
img_filename = os.path.splitext(file)[0]
label_path = os.path.join(src_label_root, img_filename + '.xml')
yolo_label_path = os.path.join(dst_label_root, img_filename + '.txt')
try:
if img_filename in label_files: # Image has a corresponding label
# Parse VOC XML file
xmldoc = minidom.parse(label_path)
size = xmldoc.getElementsByTagName('size')[0]
width = int(size.getElementsByTagName('width')[0].childNodes[0].data)
height = int(size.getElementsByTagName('height')[0].childNodes[0].data)
yolo_labels = []
for obj in xmldoc.getElementsByTagName('object'):
label = obj.getElementsByTagName('name')[0].childNodes[0].data
if label not in class_map:
raise KeyError(f"Unknown label '{label}' in {class_map}")
bndbox = obj.getElementsByTagName('bndbox')[0]
xmin = float(bndbox.getElementsByTagName('xmin')[0].childNodes[0].data)
ymin = float(bndbox.getElementsByTagName('ymin')[0].childNodes[0].data)
xmax = float(bndbox.getElementsByTagName('xmax')[0].childNodes[0].data)
ymax = float(bndbox.getElementsByTagName('ymax')[0].childNodes[0].data)
# Convert to YOLO format
x_center = (xmin + xmax) / 2 / width
y_center = (ymin + ymax) / 2 / height
bbox_w = (xmax - xmin) / width
bbox_h = (ymax - ymin) / height
yolo_labels.append(f"{class_map[label]} {x_center} {y_center} {bbox_w} {bbox_h}\n")
# Write YOLO label file
with open(yolo_label_path, 'w') as f:
f.writelines(yolo_labels)
elif not is_whole_data:
skipped_count += 1
continue # Skip image without a label if not saving all data
# Copy or rewrite the image according to the parameters
dst_img_path = os.path.join(dst_img_root, file)
if is_review_img:
img = Image.open(img_path)
img.save(dst_img_path)
elif is_copy_img or is_whole_data:
shutil.copy(img_path, dst_img_path)
processed_count += 1
except Exception as e:
error_count += 1
errors.append((file, str(e)))
# Summary of the process
print(f"Processing complete: {processed_count} processed, {skipped_count} skipped, {error_count} errors.")
for file, error_msg in errors:
print(f"Error in {file}: {error_msg}")
二、运行代码:
if __name__ == '__main__':
# Class map for object classes
class_map = {"nohelmet": 0, "helmet": 1}
supported_image_formats = ('.jpg', '.jpeg', '.png', '.bmp')
src_label = "./VOC/labels"
src_img = "./VOC/images"
dst_label = "./VOC_OUTPUT/txt_labels"
dst_img = "./VOC_OUTPUT/re_images"
# Example usage
# is_review_img的优先级大于is_copy_img
voc_to_yolo(
src_label_root=src_label,
src_img_root=src_img,
dst_label_root=dst_label,
dst_img_root=dst_img,
class_map=class_map,
is_whole_data=True,
is_review_img=True,
is_copy_img=False
)