目标检测-数据准备系列(一)--voc2coco_for i, cls in enumerate(classes, 1)-优快云博客

本文链接：https://blog.youkuaiyun.com/fengxinzioo/article/details/103102139

本文介绍了一种将VOC格式的标注数据转换为COCO格式的方法，详细解析了Python脚本如何从XML文件中提取bounding box信息，并将其转化为COCO所需的JSON格式，适用于图像识别和目标检测任务的数据预处理。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

直接贴源码：

# -*- coding=utf-8 -*-
import json
import os
import cv2
import xml.etree.ElementTree as ET
import shutil

# 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
def parse_xml(xml_path):
    tree = ET.parse(xml_path)		
    root = tree.getroot()
    objs = root.findall('object')
    coords = list()
    for ix, obj in enumerate(objs):
        name = obj.find('name').text
        box = obj.find('bndbox')
        x_min = int(box[0].text)
        y_min = int(box[1].text)
        x_max = int(box[2].text)
        y_max = int(box[3].text)
        coords.append([x_min, y_min, x_max, y_max, name])
    return coords

def convert(root_path, source_xml_root_path, target_xml_root_path, phase='train', split=80000):
    '''
    root_path:
        根路径，里面包含JPEGImages(图片文件夹)，classes.txt(类别标签),以及annotations文件夹(如果没有则会自动创建，用于保存最后的json)
    source_xml_root_path:
        VOC xml文件存放的根目录
    target_xml_root_path:
        coco xml存放的根目录
    phase:
        状态：'train'或者'test'
    split:
        train和test图片的分界点数目

    '''

    dataset = {'categories':[], 'images':[], 'annotations':[]}

    # 打开类别标签
    with open(os.path.join(root_path, 'classes.txt')) as f:
        classes = f.read().strip().split()

    # 建立类别标签和数字id的对应关系
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'beverage'})   #mark

    # 读取images文件夹的图片名称
    pics = [f for f in os.listdir(os.path.join(root_path, 'JPEGImages'))]

    # 判断是建立训练集还是验证集
    if phase == 'train':
        pics = [line for i, line in enumerate(pics) if i <= split]
    elif phase == 'val':
        pics = [line for i, line in enumerate(pics) if i > split]

    print('---------------- start convert ---------------')
    bnd_id = 1	#初始为1
    for i, pic in enumerate(pics):
        # print('pic  '+str(i+1)+'/'+str(len(pics)))
        xml_path = os.path.join(source_xml_root_path, pic[:-4]+'.xml')
        pic_path = os.path.join(root_path, 'JPEGImages/' + pic)
        # 用opencv读取图片，得到图像的宽和高
        im = cv2.imread(pic_path)
        height, width, _ = im.shape
        # 添加图像的信息到dataset中
        dataset['images'].append({'file_name': pic,
                                  'id': i,
                                  'width': width,
                                  'height': height})
        try:
            coords = parse_xml(xml_path)
        except:
            print(pic[:-4]+'.xml not exists~')
            continue
        for coord in coords:
            # x_min
            x1 = int(coord[0])-1
            x1 = max(x1, 0)
            # y_min
            y1 = int(coord[1])-1
            y1 = max(y1, 0)
            # x_max
            x2 = int(coord[2])
            # y_max
            y2 = int(coord[3])
            assert x1<x2
            assert y1<y2
            # name
            name = coord[4]
            cls_id = classes.index(name)+1	#从1开始
            width = max(0, x2 - x1)
            height = max(0, y2 - y1)
            dataset['annotations'].append({
                'area': width * height,
                'bbox': [x1, y1, width, height],
                'category_id': int(cls_id),
                'id': bnd_id,
                'image_id': i,
                'iscrowd': 0,
                # mask, 矩形是从左上角点按顺时针的四个顶点
                'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
            })
            bnd_id += 1

    # 保存结果的文件夹
    folder = os.path.join(target_xml_root_path, 'annotations')
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder)
    json_name = os.path.join(target_xml_root_path, 'annotations/instances_{}2014.json'.format(phase))
    with open(json_name, 'w') as f:
      json.dump(dataset, f)

if __name__ == '__main__':
    convert(root_path='../VOC2007/', source_xml_root_path = '../VOC2007/Annotations', target_xml_root_path = './data_coco')

源自：