【目标检测】提取保存VOC数据集选定类的标注和图片

原创已于 2023-03-14 18:58:38 修改 · 1.2k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#python #深度学习 #计算机视觉

于 2022-08-19 22:53:09 首次发布

提取VOC数据集选定类别对应的xml和图片：

修改代码中VOC数据和提取数据的路径，创建list.txt保存提取的文件名

import os
import xml.etree.ElementTree as ET
import shutil
 
# Path
ann_filepath = 'Annotations/'
img_filepath = 'JPEGImages/'
img_savepath = 'test/JPEGImages/'
ann_savepath = 'test/Annotations/'
if not os.path.exists(img_savepath):
    os.mkdir(img_savepath)
 
if not os.path.exists(ann_savepath):
    os.mkdir(ann_savepath)
 
# VOC class information
# classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
#             'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
#              'dog', 'horse', 'motorbike', 'pottedplant',
#           'sheep', 'sofa', 'train', 'person','tvmonitor']
 
classes = ['aeroplane', 'bicycle', 'tvmonitor']    # The classes needed
 
def save_annotation(file):
    tree = ET.parse(ann_filepath + '/' + file)
    root = tree.getroot()
    result = root.findall("object")
    bool_num = 0
    for obj in result:
        if obj.find("name").text not in classes:
            root.remove(obj)
        else:
            bool_num = 1
    if bool_num:
        tree.write(ann_savepath + file)
        return True
    else:
        return False
 
def save_images(file):
    name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
    shutil.copy(name_img, img_savepath)
    with open('list.txt', 'a') as file_txt:
        file_txt.write(os.path.splitext(file)[0])
        file_txt.write("\n")
    return True
 
if __name__ == '__main__':
    for f in os.listdir(ann_filepath):
        if save_annotation(f):
            save_images(f)

参考 [1]

关注博主即可阅读全文