在利用半自动处理工具勾绘的类别,想要对其精选处理,具体示例代码如下(优先级:merge_dict>labels_to_delete>abels_to_delete):
import os
import json
import shutil
from collections import defaultdict
# ====================== 🚩 配置区域 ====================== #
input_dir = r"D:\plant_seg_datasets\20250524\test_img"
output_dir = input_dir + "_edited1"
# 标签合并配置 (旧标签: 新标签)
merge_dict = {
# 示例:
# "old_label": "new_label",
}
# 要删除的标签集合
labels_to_delete = set() # 例如:{"trash", "ignore"}
# 要保留的标签集合 (设为None表示保留所有)
labels_to_keep = {"hei_shui_yin_lian_hua",
'duo_bei_yin_lian_hua',
'ding_bing_hu',
'dan_hua_ji',
'dong_bei_yan_hu_suo',
'tu_kui',
'fan_e_yin_lian_hua'
} # 只保留这个标签
# ======================================================== #
def process_isat_files(input_dir, output_dir):
"""处理ISAT格式的JSON文件"""
os.makedirs(output_dir, exist_ok=True)
# 统计计数器
stats = defaultdict(lambda: defaultdict(int))
processed_files = 0
total_json_files = 0
skipped_files = 0
found_all_labels = set() # 收集所有发现的标签
for filename in os.listdir(input_dir):
if not filename.endswith(".json"):
# 复制非JSON文件(如图片)
src = os.path.join(input_dir, filename)
dst = os.path.join(output_dir, filename)
if not os.path.exists(dst):
shutil.copy2(src, dst)
continue
total_json_files += 1
json_path = os.path.join(input_dir, filename)
try:
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
print(f"⚠️ 错误: 无法读取 {filename} - {str(e)}")
continue
# 检查是否有"objects"字段
if "objects" not in data:
print(f" ⚠️ 警告: {filename} 没有'objects'字段,跳过处理")
# 直接复制文件
shutil.copy2(json_path, os.path.join(output_dir, filename))
skipped_files += 1
continue
new_objects = []
image_modified = False
found_labels = set()
print(f"\n📄 处理文件: {filename}")
for obj in data["objects"]:
original_label = obj["category"]
current_label = original_label
found_labels.add(original_label)
found_all_labels.add(original_label)
# 1. 删除操作
if original_label in labels_to_delete:
print(f" × 删除标签: {original_label}")
stats[filename]["deleted"] += 1
image_modified = True
continue
# 2. 合并操作
if original_label in merge_dict:
new_label = merge_dict[original_label]
print(f" → 合并标签: {original_label} → {new_label}")
current_label = new_label
stats[filename]["merged"] += 1
image_modified = True
# 3. 保留操作
if labels_to_keep is not None:
if current_label not in labels_to_keep:
print(f" ✗ 过滤标签: {current_label} (不在保留列表中)")
stats[filename]["filtered"] += 1
image_modified = True
continue
else:
print(f" ✓ 保留标签: {current_label}")
else:
print(f" ✓ 保留标签: {current_label} (无过滤规则)")
# 更新标签并保留对象
if current_label != original_label:
obj["category"] = current_label
new_objects.append(obj)
# 打印文件中的标签统计
print(f" 🔍 文件中存在的标签: {', '.join(found_labels)}")
# 仅当有修改时才更新JSON
if image_modified:
data["objects"] = new_objects
new_json_path = os.path.join(output_dir, filename)
with open(new_json_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
processed_files += 1
print(f" ✅ 文件已修改")
else:
# 无修改则直接复制原文件
src = os.path.join(input_dir, filename)
dst = os.path.join(output_dir, filename)
if not os.path.exists(dst):
shutil.copy2(src, dst)
skipped_files += 1
print(f" ⏩ 文件未修改(已复制)")
return processed_files, skipped_files, total_json_files, stats, found_all_labels
def print_summary(processed_files, skipped_files, total_json_files, stats, found_all_labels):
"""打印处理结果摘要"""
print("\n" + "=" * 50)
print(f"✅ 处理完成! 共处理 {total_json_files} 个JSON文件")
print(f" - 修改文件: {processed_files}")
print(f" - 未修改文件: {skipped_files}")
print("=" * 50)
# 打印所有发现的标签
print("\n🔎 在所有文件中发现的标签:")
for label in sorted(found_all_labels):
print(f" - {label}")
total_counts = defaultdict(int)
for filename, counts in stats.items():
print(f"\n📄 文件: {filename}")
for op, count in counts.items():
op_name = {
"deleted": "删除",
"merged": "合并",
"filtered": "过滤"
}.get(op, op)
print(f" - {op_name}: {count}个对象")
total_counts[op] += count
if total_counts:
print("\n📊 总计操作:")
for op, count in total_counts.items():
op_name = {
"deleted": "删除",
"merged": "合并",
"filtered": "过滤"
}.get(op, op)
print(f" - {op_name}: {count}个对象")
else:
print("\nℹ️ 没有对象被修改")
# 配置与数据匹配检查
print("\n🔧 配置检查:")
if labels_to_keep:
missing_labels = [label for label in labels_to_keep if label not in found_all_labels]
if missing_labels:
print(f" ⚠️ 警告: 配置中指定的保留标签未在数据中找到:")
for label in missing_labels:
print(f" - {label}")
else:
print(" ✓ 所有配置的保留标签都在数据中找到")
print(f"\n📁 输出目录: {output_dir}")
if __name__ == "__main__":
print("=" * 50)
print(f"🛠️ 开始处理ISAT数据集")
print(f"📂 输入目录: {input_dir}")
print("=" * 50 + "\n")
print("🔧 配置信息:")
print(f" - 合并规则: {merge_dict or '无'}")
print(f" - 删除标签: {labels_to_delete or '无'}")
print(f" - 保留标签: {labels_to_keep or '所有标签'}")
print()
processed_files, skipped_files, total_json_files, stats, found_all_labels = process_isat_files(input_dir,
output_dir)
print_summary(processed_files, skipped_files, total_json_files, stats, found_all_labels)