1.提取数据集需要的标签类别的图片和标签另存
VOC格式的xml标签:
(全部代码)
import os
import shutil
import xml.etree.ElementTree as ET
def copy_files_with_bottle_label(source_folder, out_folder):
# 创建输出文件夹
out_images_folder = os.path.join(out_folder, "images")
out_xml_folder = os.path.join(out_folder, "xml")
os.makedirs(out_images_folder, exist_ok=True)
os.makedirs(out_xml_folder, exist_ok=True)
# 遍历XML文件夹
for root, dirs, files in os.walk(os.path.join(source_folder, "xml")):
for file in files:
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
tree = ET.parse(xml_path)
root_xml = tree.getroot()
for obj in root_xml.findall('object'):
label = obj.find('name').text
if label == 'bottle':
# if label in ['bottle', 'littlebottle']:
# 复制XML文件到输出文件夹
shutil.copy(xml_path, out_xml_folder)
# 复制对应的图片文件到输出文件夹
img_name = os.path.splitext(file)[0] + '.jpg'
img_path = os.path.join(source_folder, "images", img_name)
shutil.copy(img_path, out_images_folder)
break # 如果找到了符合条件的标签,就不需要继续检查该XML文件了
source_folders = r"E:\demo\source_folders "
folders_name = ["folder1_202401", "folder2_202402"] #所包含的子文件夹
out_folder = r"E:\demo\out_folder "
for folder_name in folders_name:
source_folder = os.path.join(source_folders, folder_name)
# out_folder = os.path.join(out_folder, folder_name)
copy_files_with_bottle_label(source_folder, out_folder)
2. 删除图片中的目标和标签yolo版,并另存
用白色填充图片的目标区域,删除目标标签类的标签框。
(全部代码)
import os
from PIL import Image, ImageDraw
from tqdm import tqdm
def process_yolo_label(label_path, out_label_path, out_img_path):
# 打开对应的图片文件
img_path = os.path.join(os.path.dirname(label_path.replace('labels', 'images')), os.path.splitext(os.path.basename(label_path))[0] + '.jpg')
img = Image.open(img_path)
img_width, img_height = img.size
# 读取标签文件内容
with open(label_path, 'r') as f:
lines = f.readlines()
# 创建新的标签文件内容
new_lines = []
for line in lines:
# 解析标签信息
parts = line.strip().split()
label = parts[0]
x_center = float(parts[1])
y_center = float(parts[2])
width = float(parts[3])
height = float(parts[4])
# 检查目标类别是否需要保留,保留8,9目标类别。
# if label in ['8', '9']:
if label in ['7']:
new_lines.append(line)
else:
# 计算目标区域的坐标
left = int((x_center - width / 2) * img_width)
top = int((y_center - height / 2) * img_height)
right = int((x_center + width / 2) * img_width)
bottom = int((y_center + height / 2) * img_height)
# 用白色填充目标区域
draw = ImageDraw.Draw(img)
draw.rectangle([(left, top), (right, bottom)], fill='white')
# 保存修改后的标签文件
with open(out_label_path, 'w') as f:
f.writelines(new_lines)
# 保存修改后的图片文件
img.save(out_img_path)
def process_folder(source_folder, out_folder):
out_images_folder = os.path.join(out_folder, "images")
out_labels_folder = os.path.join(out_folder, "labels")
os.makedirs(out_images_folder, exist_ok=True)
os.makedirs(out_labels_folder, exist_ok=True)
# 获取标签文件夹中的文件数量
num_files = sum(len(files) for _, _, files in os.walk(os.path.join(source_folder, "labels")))
# 使用 tqdm 添加进度条
with tqdm(total=num_files, desc='Processing') as pbar:
# 遍历标签文件夹
for root, _, files in os.walk(os.path.join(source_folder, "labels")):
for file in files:
if file.endswith('.txt'):
label_path = os.path.join(root, file)
out_label_path = os.path.join(out_labels_folder, file)
out_img_path = os.path.join(out_images_folder, os.path.splitext(file)[0] + '.jpg')
process_yolo_label(label_path, out_label_path, out_img_path)
pbar.update(1) # 更新进度条
source_folder = r"E:\demo\source_folder \val"
out_folder = r"E:\demo\out_folder \del_class"
process_folder(source_folder, out_folder)
3. 删除图片中的目标和标签XML版
用白色填充图片的目标区域,删除目标标签类的标签框。
(全部代码)
import os
import shutil
from xml.etree import ElementTree as ET
from PIL import Image, ImageDraw
def process_voc_xml(xml_path, out_xml_path, out_img_path):
tree = ET.parse(xml_path)
root = tree.getroot()
# 创建一份新的XML树
new_root = ET.Element("annotation")
for elem in root:
if elem.tag == 'folder':
new_folder = ET.SubElement(new_root, elem.tag)
new_folder.text = elem.text
elif elem.tag == 'filename':
new_filename = ET.SubElement(new_root, elem.tag)
new_filename.text = elem.text
elif elem.tag == 'size':
new_size = ET.SubElement(new_root, elem.tag)
for size_elem in elem:
new_size_elem = ET.SubElement(new_size, size_elem.tag)
new_size_elem.text = size_elem.text
elif elem.tag == 'object':
name = elem.find('name').text
if name in ['bottle', 'littlebottle']:
new_root.append(elem)
# 保存修改后的XML文件
new_tree = ET.ElementTree(new_root)
new_tree.write(out_xml_path)
# 打开对应的图片文件
img_name = os.path.splitext(os.path.basename(xml_path))[0] + '.jpg'
img_path = os.path.join(os.path.dirname(xml_path.replace('xml', 'images')), img_name)
img = Image.open(img_path)
draw = ImageDraw.Draw(img)
# 获取图片的尺寸
img_width, img_height = img.size
# 用白色填充图片上除了 "bottle" 和 "littlebottle" 类别之外的目标区域
for obj in root.findall('object'):
name = obj.find('name').text
if name not in ['bottle', 'littlebottle']:
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
draw.rectangle([(xmin, ymin), (xmax, ymax)], fill='white')
# 保存修改后的图片文件
img.save(out_img_path)
def process_folder(source_folder, out_folder):
out_images_folder = os.path.join(out_folder, "images")
out_xml_folder = os.path.join(out_folder, "xml")
os.makedirs(out_images_folder, exist_ok=True)
os.makedirs(out_xml_folder, exist_ok=True)
# 遍历XML文件夹
for root, _, files in os.walk(os.path.join(source_folder, "xml")):
for file in files:
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
out_xml_path = os.path.join(out_xml_folder, file)
out_img_path = os.path.join(out_images_folder, os.path.splitext(file)[0] + '.jpg')
process_voc_xml(xml_path, out_xml_path, out_img_path)
source_folder = r"E:\demo\source_folder "
out_folder = r"E:\demo\source_folder "
process_folder(source_folder, out_folder)