从 VOC 到 YOLO：转换目标检测数据集（附完整代码）

原创已于 2025-11-14 19:26:46 修改 · 705 阅读

12 ·

CC 4.0 BY-SA版权

文章标签：

#YOLO #目标检测 #人工智能 #python #计算机视觉

于 2025-11-14 19:16:41 首次发布

算法专栏收录该内容

5 篇文章

订阅专栏

部署运行你感兴趣的模型镜像

VOC 到 YOLO

一、VOC 和 YOLO 是什么？有什么区别？
- 1. PASCAL VOC 格式（经典 XML）
- 2. YOLO 格式（简洁 TXT）
二、为什么要写这个转换脚本？
三、VOC 转 YOLO 代码

一、VOC 和 YOLO 是什么？有什么区别？

VOC 是“通用语言”，适合做学术 benchmark，兼容 Faster R-CNN 等模型
YOLO 是“实战派”，只为YOLO 系列模型服务，但效率极高。

1. PASCAL VOC 格式（经典 XML）

每张图片对应一个 .xml 文件。
标注信息包括：图像宽高、每个物体的类别名、边界框坐标（xmin, ymin, xmax, ymax）。

示例：

<annotation>
  <object>
    <name>with_mask</name>
    <bndbox>
      <xmin>100</xmin>
      <ymin>80</ymin>
      <xmax>200</xmax>
      <ymax>180</ymax>
    </bndbox>
  </object>
</annotation>

2. YOLO 格式（简洁 TXT）

每张图片对应一个 .txt 文件。
每行表示一个物体，所有坐标都是归一化值（0~1），相对于图像宽高

格式为：

class_id center_x center_y width height

0 0.45 0.5 0.2 0.3

二、为什么要写这个转换脚本？

很多公开数据集（如口罩检测、交通标志）是以 VOC 格式发布的。

但 YOLO 官方代码只支持 YOLO 格式。

因此，我们需要一个工具：自动把 VOC 转成 YOLO，并划分训练/验证/测试集。

这就是本文脚本要做的事！

三、VOC 转 YOLO 代码

1. 整体目标：

将一个 PASCAL VOC 格式的数据集（包含 Annotations/.xml 和 JPEGImages/.png/.jpg）转换为 YOLO 格式的数据集；
并自动划分为 train/val/test 三个子集；
最终生成可用于 YOLOv5/v8 训练的结构和配置文件

在这里插入图片描述

2. 完整代码

在你的项目里创建一个 YoloReverse.py 文件

在这里插入图片描述

写入以下代码，修改：

修改	内容
voc_data_path	VOC 所在路径，文件名要与实际文件名对应
yolo_data_path	YOLO 存入路径
class_mapping	与 VOC 中设置的分类名称一致，从0开始

运行

import os
import shutil
import random
import xml.etree.ElementTree as ET
from tqdm import tqdm

# ================== 配置区：配置数据路径 ==================
voc_data_path = r'F:\yolov\yolov5-mask-42-master\images\DataSet-VOC'
voc_annotations_path = os.path.join(voc_data_path, 'Annotations')
voc_images_path = os.path.join(voc_data_path, 'JPEGImages')

yolo_data_path = r'F:\yolov\yolov5-mask-42-master\images\DataSet-YOLO'
yolo_images_path = os.path.join(yolo_data_path, 'images')
yolo_labels_path = os.path.join(yolo_data_path, 'labels')

# 类别映射
class_mapping = {
    'with_mask': 0,
    'without_mask': 1,
}

# ================== 转换函数 ==================
def convert_voc_to_yolo(voc_annotation_file, yolo_label_file):
    tree = ET.parse(voc_annotation_file)
    root = tree.getroot()

    size = root.find('size')
    width = float(size.find('width').text)
    height = float(size.find('height').text)

    with open(yolo_label_file, 'w') as f:
        for obj in root.findall('object'):
            cls = obj.find('name').text
            if cls not in class_mapping:
                print(f"未知类别: {cls}，跳过...")
                continue
            cls_id = class_mapping[cls]
            xmlbox = obj.find('bndbox')
            xmin = float(xmlbox.find('xmin').text)
            ymin = float(xmlbox.find('ymin').text)
            xmax = float(xmlbox.find('xmax').text)
            ymax = float(xmlbox.find('ymax').text)

            x_center = (xmin + xmax) / 2.0 / width
            y_center = (ymin + ymax) / 2.0 / height
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height

            f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

# ================== 创建目录 ==================
os.makedirs(yolo_images_path, exist_ok=True)
os.makedirs(yolo_labels_path, exist_ok=True)

# ================== VOC → YOLO 转换 ==================
print("开始VOC到YOLO格式转换...")
xml_files = [f for f in os.listdir(voc_annotations_path) if f.endswith('.xml')]
for xml_file in tqdm(xml_files):
    voc_annotation_file = os.path.join(voc_annotations_path, xml_file)
    image_id = os.path.splitext(xml_file)[0]

    # 查找对应图像
    found = False
    for ext in ['.png', '.jpg']:
        voc_image_file = os.path.join(voc_images_path, f"{image_id}{ext}")
        if os.path.exists(voc_image_file):
            yolo_image_file = os.path.join(yolo_images_path, f"{image_id}{ext}")
            shutil.copy(voc_image_file, yolo_image_file)
            found = True
            break
    if not found:
        print(f"图像未找到: {image_id}")
        continue

    yolo_label_file = os.path.join(yolo_labels_path, f"{image_id}.txt")
    convert_voc_to_yolo(voc_annotation_file, yolo_label_file)

print("VOC到YOLO格式转换完成！")

# ================== 划分数据集 ==================
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# 创建子集目录
subsets = ['train', 'val', 'test']
for subset in subsets:
    os.makedirs(os.path.join(yolo_data_path, subset, 'images'), exist_ok=True)
    os.makedirs(os.path.join(yolo_data_path, subset, 'labels'), exist_ok=True)

# 获取所有图像名（不含扩展名）
image_files = [f[:-4] for f in os.listdir(yolo_images_path) if f.endswith(('.png', '.jpg'))]
random.shuffle(image_files)

train_count = int(train_ratio * len(image_files))
val_count = int(val_ratio * len(image_files))
test_count = len(image_files) - train_count - val_count

train_files = image_files[:train_count]
val_files = image_files[train_count:train_count + val_count]
test_files = image_files[train_count + val_count:]

# 移动文件
def move_files(files, src_img_dir, src_lab_dir, dst_img_dir, dst_lab_dir):
    for file in files:
        src_img = os.path.join(src_img_dir, f"{file}.png")
        src_lab = os.path.join(src_lab_dir, f"{file}.txt")
        dst_img = os.path.join(dst_img_dir, f"{file}.png")
        dst_lab = os.path.join(dst_lab_dir, f"{file}.txt")

        if os.path.exists(src_img) and os.path.exists(src_lab):
            shutil.move(src_img, dst_img)
            shutil.move(src_lab, dst_lab)

# 移动
move_files(train_files, yolo_images_path, yolo_labels_path,
           os.path.join(yolo_data_path, 'train', 'images'),
           os.path.join(yolo_data_path, 'train', 'labels'))
move_files(val_files, yolo_images_path, yolo_labels_path,
           os.path.join(yolo_data_path, 'val', 'images'),
           os.path.join(yolo_data_path, 'val', 'labels'))
move_files(test_files, yolo_images_path, yolo_labels_path,
           os.path.join(yolo_data_path, 'test', 'images'),
           os.path.join(yolo_data_path, 'test', 'labels'))

print("数据集划分完成！")

# ================== 生成 dataset.yaml ==================
yaml_content = f"""train: ./train/images
val: ./val/images

nc: {len(class_mapping)}
names: {list(class_mapping.keys())}
"""
with open(os.path.join(yolo_data_path, 'dataset.yaml'), 'w') as f:
    f.write(yaml_content)
print("dataset.yaml 已生成！")

# ================== 注意：不要删除原始文件夹！==================
# 如果需要清理，建议注释掉以下两行：
# shutil.rmtree(yolo_images_path)
# shutil.rmtree(yolo_labels_path)

运行结果

DataSet-YOLO/
├── train/
│   ├── images/
│   └── labels/
├── val/
│   ├── images/
│   └── labels/
├── test/
│   ├── images/
│   └── labels/
└── dataset.yaml

在这里插入图片描述

2. 代码解析

第一步：导入必要的库

import os          # 操作文件路径
import shutil      # 复制/移动文件
import random      # 随机打乱数据
import xml.etree.ElementTree as ET  # 解析 XML 文件
from tqdm import tqdm  # 显示进度条

第二步：配置路径和类别

# VOC 数据存放位置
voc_data_path = r'F:\...\images\DataSet-VOC'
voc_annotations_path = os.path.join(voc_data_path, 'Annotations')  # XML 文件夹
voc_images_path = os.path.join(voc_data_path, 'JPEGImages')        # 图片文件夹

# YOLO 输出位置
yolo_data_path = r'F:\...\images\DataSet-YOLO'
yolo_images_path = os.path.join(yolo_data_path, 'images')
yolo_labels_path = os.path.join(yolo_data_path, 'labels')

# 类别映射：VOC 实际的类别名 → YOLO 的数字 ID
class_mapping = {
    'with_mask': 0,
    'without_mask': 1,
}

使用 os.path.join 会自动根据当前系统选择正确的分隔符，让代码跨平台兼容

Windows 用 \
Linux/macOS 用 /

第三步：核心转换函数

def convert_voc_to_yolo(voc_annotation_file, yolo_label_file):
    # 1. 读取 XML 文件
    tree = ET.parse(voc_annotation_file)
    root = tree.getroot()

    # 2. 获取图像宽高
    width = float(root.find('size').find('width').text)
    height = float(root.find('size').find('height').text)

    # 3. 遍历每个物体
    with open(yolo_label_file, 'w') as f:
        for obj in root.findall('object'):
            cls = obj.find('name').text
            if cls not in class_mapping:
                continue  # 跳过未知类别
            cls_id = class_mapping[cls]

            # 4. 读取边界框坐标
            xmin = float(obj.find('bndbox').find('xmin').text)
            ymin = float(obj.find('bndbox').find('ymin').text)
            xmax = float(obj.find('bndbox').find('xmax').text)
            ymax = float(obj.find('bndbox').find('ymax').text)

            # 5. 转换为 YOLO 格式（归一化中心+宽高）
            x_center = (xmin + xmax) / 2.0 / width
            y_center = (ymin + ymax) / 2.0 / height
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height

            # 6. 写入 .txt 文件，  :.6f 表示：保留 6 位小数的浮点数
            f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

关键公式：

中心点：x_center = (xmin + xmax) / 2 / image_width
宽高：w = (xmax - xmin) /image_width

第四步：执行转换 + 复制图片

# 创建输出目录
os.makedirs(yolo_images_path, exist_ok=True)
os.makedirs(yolo_labels_path, exist_ok=True)

# 遍历所有 XML 文件
for xml_file in tqdm(xml_files):
    # 找到对应的图片（可能是 .jpg 或 .png）
    # 复制图片到 YOLO/images
    # 调用 convert_voc_to_yolo 生成 .txt 标签

第五步：划分训练集、验证集、测试集

train_ratio = 0.7  # 70% 训练
val_ratio = 0.2    # 20% 验证
test_ratio = 0.1   # 10% 测试

# 随机打乱所有文件名
random.shuffle(image_files)

# 按比例切分
train_files = image_files[:train_count]
val_files = image_files[train_count:train_count + val_count]
test_files = image_files[train_count + val_count:]

# 移动文件到对应子文件夹

第六步：生成 dataset.yaml

这是 YOLOv5 训练时必需的配置文件：

train: ./train/images
val: ./val/images

nc: 2
names: ['with_mask', 'without_mask']

nc: number of classes（类别数）
names: 类别名称列表（顺序必须与 class_mapping 一致）

您可能感兴趣的与本文相关的镜像

Yolo-v5

Yolo

YOLO（You Only Look Once）是一种流行的物体检测和图像分割模型，由华盛顿大学的Joseph Redmon 和Ali Farhadi 开发。 YOLO 于2015 年推出，因其高速和高精度而广受欢迎