PASCAL VOC格式转YOLO格式，适配YOLOv3-v11_pascal voc转yolo-优快云博客

本文链接：https://blog.youkuaiyun.com/jerson_al/article/details/144348531

近来发现平时写的小脚本总是找不到位置了，不如直接写成博客，方便自己查找，也方便他人参考。

此脚本实现：

1.PASCAL VOC格式的数据集转YOLO格式。

2.适配多种图片格式

3.支持重新读写图片，解决训练时经常出现的图片通道问题。（当然可以直接copy也没问题）

4.支持自定义设置保存全量图片或只保存拥有标注文件的图片

一、具体代码：

# -*- coding: utf-8 -*-
"""
Created on 2024/12/9 上午9:56
@author: Jun Wei Xin
@contact: jerson_al@163.com
"""

import os
from xml.dom import minidom
import shutil
from PIL import Image



def voc_to_yolo(src_label_root, src_img_root, dst_label_root, dst_img_root, class_map,
                is_whole_data=True, is_review_img=False, is_copy_img=False):
    """
    Convert VOC format annotations to YOLO format with options for handling images.

    Args:
        src_label_root (str): Source directory containing VOC XML label files.
        src_img_root (str): Source directory containing images.
        dst_label_root (str): Destination directory for YOLO labels.
        dst_img_root (str): Destination directory for images.
        class_map (dict): Mapping of class names to YOLO class IDs.
        is_whole_data (bool): Whether to save all images even without labels.
        is_review_img (bool): Whether to rewrite images to the destination.
        is_copy_img (bool): Whether to copy images to the destination.
    """
    processed_count = 0
    skipped_count = 0
    error_count = 0
    errors = []

    os.makedirs(dst_label_root, exist_ok=True)
    if is_review_img or is_copy_img or is_whole_data:
        os.makedirs(dst_img_root, exist_ok=True)

    # Create a set of all label files (without extensions)
    label_files = {os.path.splitext(file)[0] for file in os.listdir(src_label_root) if file.endswith('.xml')}

    # Process all images in the source image directory
    for file in os.listdir(src_img_root):
        img_path = os.path.join(src_img_root, file)
        if os.path.splitext(file)[1].lower() not in supported_image_formats:
            continue

        img_filename = os.path.splitext(file)[0]
        label_path = os.path.join(src_label_root, img_filename + '.xml')
        yolo_label_path = os.path.join(dst_label_root, img_filename + '.txt')

        try:
            if img_filename in label_files:  # Image has a corresponding label
                # Parse VOC XML file
                xmldoc = minidom.parse(label_path)
                size = xmldoc.getElementsByTagName('size')[0]
                width = int(size.getElementsByTagName('width')[0].childNodes[0].data)
                height = int(size.getElementsByTagName('height')[0].childNodes[0].data)

                yolo_labels = []
                for obj in xmldoc.getElementsByTagName('object'):
                    label = obj.getElementsByTagName('name')[0].childNodes[0].data
                    if label not in class_map:
                        raise KeyError(f"Unknown label '{label}' in {class_map}")

                    bndbox = obj.getElementsByTagName('bndbox')[0]
                    xmin = float(bndbox.getElementsByTagName('xmin')[0].childNodes[0].data)
                    ymin = float(bndbox.getElementsByTagName('ymin')[0].childNodes[0].data)
                    xmax = float(bndbox.getElementsByTagName('xmax')[0].childNodes[0].data)
                    ymax = float(bndbox.getElementsByTagName('ymax')[0].childNodes[0].data)

                    # Convert to YOLO format
                    x_center = (xmin + xmax) / 2 / width
                    y_center = (ymin + ymax) / 2 / height
                    bbox_w = (xmax - xmin) / width
                    bbox_h = (ymax - ymin) / height
                    yolo_labels.append(f"{class_map[label]} {x_center} {y_center} {bbox_w} {bbox_h}\n")

                # Write YOLO label file
                with open(yolo_label_path, 'w') as f:
                    f.writelines(yolo_labels)

            elif not is_whole_data:
                skipped_count += 1
                continue  # Skip image without a label if not saving all data

            # Copy or rewrite the image according to the parameters
            dst_img_path = os.path.join(dst_img_root, file)
            if is_review_img:
                img = Image.open(img_path)
                img.save(dst_img_path)
            elif is_copy_img or is_whole_data:
                shutil.copy(img_path, dst_img_path)

            processed_count += 1

        except Exception as e:
            error_count += 1
            errors.append((file, str(e)))

    # Summary of the process
    print(f"Processing complete: {processed_count} processed, {skipped_count} skipped, {error_count} errors.")
    for file, error_msg in errors:
        print(f"Error in {file}: {error_msg}")

二、运行代码：

if __name__ == '__main__':
    # Class map for object classes

    class_map = {"nohelmet": 0, "helmet": 1}
    supported_image_formats = ('.jpg', '.jpeg', '.png', '.bmp')
    src_label = "./VOC/labels"
    src_img = "./VOC/images"
    dst_label = "./VOC_OUTPUT/txt_labels"
    dst_img = "./VOC_OUTPUT/re_images"

    # Example usage
    # is_review_img的优先级大于is_copy_img
    voc_to_yolo(
        src_label_root=src_label,
        src_img_root=src_img,
        dst_label_root=dst_label,
        dst_img_root=dst_img,
        class_map=class_map,
        is_whole_data=True,
        is_review_img=True,
        is_copy_img=False
    )