为方便编辑将labelme格式的标注类别分割开来,使用自定义类别进行训练,基于paddleseg自带工具撰写了生成指定类别的训练文件,工具包括几个类别融合成一个新类、删除类别、保留类别几个类别。
具体代码如下:
import os
import json
import glob
import shutil
import numpy as np
from PIL import Image, ImageDraw
# ====== 输入输出路径 ======
input_dir = r"D:\plant_seg_datasets\summer\labelme"#输入文件夹,要求labelme文件与其对应的照片位于同一目录下
output_dir = './data/test1'#输出文件夹
# ====== 类别处理选项 ======
#以下三个函数优先级别为: merge_classes-->delete_classes-->keep_classes
# merge_classes 用于将多个旧类合并为一个新类。
# 例如:将 'mei_han_cao' 和 'shen_shan_mao_gen' 合并为 'cao_lei'。
# 使用场景:同一属或功能组的物种合并处理。
"""
示例:
merge_classes = {
'cao_lei': ['mei_han_cao', 'shen_shan_mao_gen'],
'hua_lei': ['lun_ye_bai_he', 'zi_hua_bian_dou_cai']
}
"""
merge_classes = {}
# delete_classes 用于指定明确要删除的类(这些类在标注图中会被忽略为0号背景)。
# 不会参与训练,也不会出现在 class_names.txt 中。
# 使用场景:标注错误的类别、不感兴趣的杂类、辅助标注类(如 '__background__')。
"""
示例:
delete_classes = ['cu_mao_qi', 'fan_e_yin_lian_hua', '__background__']
"""
delete_classes = []
# keep_classes 用于进一步筛选你希望保留的类名(可以是合并后的新类)。
# 设置为 None 表示保留所有未被删除的类。
# 设置为一个列表时,仅保留这些列出的类,其他类(包括 merge 后的)将被忽略。
# 使用场景:只聚焦部分目标类,或调试阶段训练子集。
"""
示例:
keep_classes = ['cao_lei', 'hua_lei', 'shan_qie_zi']
# keep_classes = None # ← 若保留全部(未被 delete_classes 删除的类),可改为 None,背景默认保存
"""
keep_classes = ['mao_guo_yin_lian_hua']
# ====== 工具函数 ======
def get_color_map_list(num_classes):
num_classes += 1
color_map = num_classes * [0, 0, 0]
for i in range(num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
return color_map[3:]
def shape2mask(img_size, points):
label_mask = Image.fromarray(np.zeros(img_size[:2], dtype=np.uint8))
draw = ImageDraw.Draw(label_mask)
points_list = [tuple(point) for point in points]
if len(points_list) > 2:
draw.polygon(points_list, outline=1, fill=1)
return np.array(label_mask, dtype=bool)
def shape2label(img_size, shapes, class_name_mapping, class_mapping, is_kept):
label = np.zeros(img_size[:2], dtype=np.int32)
for shape in shapes:
original_label = shape['label']
if not is_kept(original_label):
continue
points = shape['points']
mapped_label = class_mapping.get(original_label, original_label)
# 跳过被删除的类别(映射为None的)
if mapped_label is None:
continue
class_id = class_name_mapping[mapped_label]
mask = shape2mask(img_size[:2], points)
label[mask] = class_id
return label
# ====== 主程序 ======
def convert_labelme_to_segmentation(input_dir, output_dir):
os.makedirs(os.path.join(output_dir, 'annotations'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
# === 构建 class_mapping ===
class_mapping = {}
for new_class, origins in merge_classes.items():
for old_class in origins:
class_mapping[old_class] = new_class
for del_class in delete_classes:
class_mapping[del_class] = None # 标记为None表示删除
# === 保留判断函数 ===
if keep_classes:
keep_set = set(keep_classes)
def is_kept(cls):
mapped = class_mapping.get(cls, cls)
# 明确排除None值
return (mapped is not None) and (mapped in keep_set)
else:
def is_kept(cls):
mapped = class_mapping.get(cls, cls)
# 明确排除None值
return mapped is not None
# === 构建最终类名列表 ===
class_names = ['_background_']
seen = set(class_names)
for json_path in glob.glob(os.path.join(input_dir, '*.json')):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
for shape in data['shapes']:
original = shape['label']
if not is_kept(original):
continue
mapped = class_mapping.get(original, original)
# 关键修复:确保只添加有效的字符串类别
if mapped is not None and mapped not in seen:
class_names.append(mapped)
seen.add(mapped)
# 确保所有类名都是字符串
class_names = [str(name) for name in class_names]
# === 保存类别文件 ===
with open(os.path.join(output_dir, 'class_names.txt'), 'w', encoding='utf-8') as f:
f.write('\n'.join(class_names))
print(f"✅ 类别数量: {len(class_names)},类别文件已保存")
print(f"类别列表: {class_names}")
# === 构建颜色映射 ===
color_map = get_color_map_list(256)
# === 处理每个 JSON 文件 ===
for json_path in glob.glob(os.path.join(input_dir, '*.json')):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
filename = os.path.splitext(os.path.basename(json_path))[0]
image_path = os.path.join(input_dir, data['imagePath'])
if not os.path.exists(image_path):
print(f"⚠️ 找不到原图: {image_path}")
continue
output_image_path = os.path.join(output_dir, 'images', os.path.basename(image_path))
output_label_path = os.path.join(output_dir, 'annotations', filename + '.png')
shutil.copy(image_path, output_image_path)
img = np.asarray(Image.open(image_path))
lbl = shape2label(img.shape, data['shapes'],
{name: idx for idx, name in enumerate(class_names)},
class_mapping, is_kept)
lbl_pil = Image.fromarray(lbl.astype(np.uint8), mode='P')
lbl_pil.putpalette(color_map)
lbl_pil.save(output_label_path)
print(f"✅ 已处理: {filename}")
print(f"\n🎉 所有文件已处理完成!输出目录:{output_dir}")
# ====== 执行转换 ======
convert_labelme_to_segmentation(input_dir, output_dir)
执行完成以上文件后,输出文件夹输出class_name.txt、标注文件夹、照片文件夹,后续执行paddleseg自带的工具即可按照训练集、验证集和测试集的比例切分数据:
python tools/data/split_dataset_list.py <dataset_root> images labels --split 0.6 0.2 0.2 --format jpg png
4789

被折叠的 条评论
为什么被折叠?



