ISAT类别编辑批处理代码（利用ISAT编辑生成的json文件，对其合并、删除、保留）

d_d_c_c

已于 2025-06-20 18:40:22 修改

阅读量314

点赞数 4

CC 4.0 BY-SA版权

于 2025-06-19 21:55:24 首次发布

本文链接：https://blog.youkuaiyun.com/d_d_c_c/article/details/148776312

深度学习专栏收录该内容

6 篇文章

订阅专栏

在利用半自动处理工具勾绘的类别，想要对其精选处理，具体示例代码如下(优先级：merge_dict>labels_to_delete>abels_to_delete)：

import os
import json
import shutil
from collections import defaultdict

# ====================== 🚩 配置区域 ====================== #
input_dir = r"D:\plant_seg_datasets\20250524\test_img"
output_dir = input_dir + "_edited1"

# 标签合并配置 (旧标签: 新标签)
merge_dict = {
    # 示例:
    # "old_label": "new_label",
}

# 要删除的标签集合
labels_to_delete = set()  # 例如：{"trash", "ignore"}

# 要保留的标签集合 (设为None表示保留所有)
labels_to_keep = {"hei_shui_yin_lian_hua",
                  'duo_bei_yin_lian_hua',
                  'ding_bing_hu',
                  'dan_hua_ji',
                  'dong_bei_yan_hu_suo',
                  'tu_kui',
                  'fan_e_yin_lian_hua'
                  }  # 只保留这个标签


# ======================================================== #

def process_isat_files(input_dir, output_dir):
    """处理ISAT格式的JSON文件"""
    os.makedirs(output_dir, exist_ok=True)

    # 统计计数器
    stats = defaultdict(lambda: defaultdict(int))
    processed_files = 0
    total_json_files = 0
    skipped_files = 0
    found_all_labels = set()  # 收集所有发现的标签

    for filename in os.listdir(input_dir):
        if not filename.endswith(".json"):
            # 复制非JSON文件（如图片）
            src = os.path.join(input_dir, filename)
            dst = os.path.join(output_dir, filename)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            continue

        total_json_files += 1
        json_path = os.path.join(input_dir, filename)

        try:
            with open(json_path, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception as e:
            print(f"⚠️ 错误: 无法读取 {filename} - {str(e)}")
            continue

        # 检查是否有"objects"字段
        if "objects" not in data:
            print(f"  ⚠️ 警告: {filename} 没有'objects'字段，跳过处理")
            # 直接复制文件
            shutil.copy2(json_path, os.path.join(output_dir, filename))
            skipped_files += 1
            continue

        new_objects = []
        image_modified = False
        found_labels = set()

        print(f"\n📄 处理文件: {filename}")

        for obj in data["objects"]:
            original_label = obj["category"]
            current_label = original_label
            found_labels.add(original_label)
            found_all_labels.add(original_label)

            # 1. 删除操作
            if original_label in labels_to_delete:
                print(f"  × 删除标签: {original_label}")
                stats[filename]["deleted"] += 1
                image_modified = True
                continue

            # 2. 合并操作
            if original_label in merge_dict:
                new_label = merge_dict[original_label]
                print(f"  → 合并标签: {original_label} → {new_label}")
                current_label = new_label
                stats[filename]["merged"] += 1
                image_modified = True

            # 3. 保留操作
            if labels_to_keep is not None:
                if current_label not in labels_to_keep:
                    print(f"  ✗ 过滤标签: {current_label} (不在保留列表中)")
                    stats[filename]["filtered"] += 1
                    image_modified = True
                    continue
                else:
                    print(f"  ✓ 保留标签: {current_label}")
            else:
                print(f"  ✓ 保留标签: {current_label} (无过滤规则)")

            # 更新标签并保留对象
            if current_label != original_label:
                obj["category"] = current_label
            new_objects.append(obj)

        # 打印文件中的标签统计
        print(f"  🔍 文件中存在的标签: {', '.join(found_labels)}")

        # 仅当有修改时才更新JSON
        if image_modified:
            data["objects"] = new_objects
            new_json_path = os.path.join(output_dir, filename)
            with open(new_json_path, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=2)
            processed_files += 1
            print(f"  ✅ 文件已修改")
        else:
            # 无修改则直接复制原文件
            src = os.path.join(input_dir, filename)
            dst = os.path.join(output_dir, filename)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            skipped_files += 1
            print(f"  ⏩ 文件未修改（已复制）")

    return processed_files, skipped_files, total_json_files, stats, found_all_labels


def print_summary(processed_files, skipped_files, total_json_files, stats, found_all_labels):
    """打印处理结果摘要"""
    print("\n" + "=" * 50)
    print(f"✅ 处理完成! 共处理 {total_json_files} 个JSON文件")
    print(f"  - 修改文件: {processed_files}")
    print(f"  - 未修改文件: {skipped_files}")
    print("=" * 50)

    # 打印所有发现的标签
    print("\n🔎 在所有文件中发现的标签:")
    for label in sorted(found_all_labels):
        print(f"  - {label}")

    total_counts = defaultdict(int)

    for filename, counts in stats.items():
        print(f"\n📄 文件: {filename}")
        for op, count in counts.items():
            op_name = {
                "deleted": "删除",
                "merged": "合并",
                "filtered": "过滤"
            }.get(op, op)
            print(f"  - {op_name}: {count}个对象")
            total_counts[op] += count

    if total_counts:
        print("\n📊 总计操作:")
        for op, count in total_counts.items():
            op_name = {
                "deleted": "删除",
                "merged": "合并",
                "filtered": "过滤"
            }.get(op, op)
            print(f"  - {op_name}: {count}个对象")
    else:
        print("\nℹ️ 没有对象被修改")

    # 配置与数据匹配检查
    print("\n🔧 配置检查:")
    if labels_to_keep:
        missing_labels = [label for label in labels_to_keep if label not in found_all_labels]
        if missing_labels:
            print(f"  ⚠️ 警告: 配置中指定的保留标签未在数据中找到:")
            for label in missing_labels:
                print(f"    - {label}")
        else:
            print("  ✓ 所有配置的保留标签都在数据中找到")

    print(f"\n📁 输出目录: {output_dir}")


if __name__ == "__main__":
    print("=" * 50)
    print(f"🛠️ 开始处理ISAT数据集")
    print(f"📂 输入目录: {input_dir}")
    print("=" * 50 + "\n")

    print("🔧 配置信息:")
    print(f"  - 合并规则: {merge_dict or '无'}")
    print(f"  - 删除标签: {labels_to_delete or '无'}")
    print(f"  - 保留标签: {labels_to_keep or '所有标签'}")
    print()

    processed_files, skipped_files, total_json_files, stats, found_all_labels = process_isat_files(input_dir,
                                                                                                   output_dir)
    print_summary(processed_files, skipped_files, total_json_files, stats, found_all_labels)