人体姿态估计——从coco数据集中提取人的图片和对应json，并建立自己的小数据集

最新推荐文章于 2025-04-28 14:11:19 发布

Coco091

最新推荐文章于 2025-04-28 14:11:19 发布

阅读量313

点赞数 6

CC 4.0 BY-SA版权

文章标签： json 人工智能 python 深度学习

本文链接：https://blog.youkuaiyun.com/Coco091/article/details/147568556

注意⚠️：请根据实际情况修改代码中的路径

一、根据分类id提取人的标签并生成新的json

import json
from tqdm import tqdm

# 从coco数据集中提取出只有人的json
def filter_coco_data(input_json, output_json, target_category_id=1):
    """
    过滤COCO数据，保留指定类别的标注及关联图片
    参数：
    input_json: 输入JSON文件路径
    output_json: 输出JSON文件路径
    target_category_id: 目标类别ID（默认为1）
    """
    with open(input_json, 'r') as f:
        data = json.load(f)

    # 第一步：过滤annotations并收集有效image_id
    valid_image_ids = set()
    filtered_annotations = []

    # 使用tqdm添加进度条（处理大规模数据时）
    for ann in tqdm(data['annotations'], desc="过滤标注"):
        if ann['category_id'] == target_category_id:
            filtered_annotations.append(ann)
            valid_image_ids.add(ann['image_id'])

    # 第二步：过滤images
    filtered_images = [
        img for img in data['images']
        if img['id'] in valid_image_ids
    ]

    # 构建新数据（保留categories和其他字段）
    new_data = {
        "info": data.get('info', {}),
        "licenses": data.get('licenses', []),
        "categories": data['categories'],
        "images": filtered_images,
        "annotations": filtered_annotations
    }

    # 保存结果
    with open(output_json, 'w') as f:
        json.dump(new_data, f, indent=2)

    # 打印统计信息
    print(f"原始数据：{len(data['images'])} 图片, {len(data['annotations'])} 标注")
    print(f"过滤后：{len(filtered_images)} 图片, {len(filtered_annotations)} 标注")



if __name__ == "__main__":
    filter_coco_data(
        input_json="D:\\HRNet\\coco2017\\annotations\\person_keypoints_train2017.json",
        output_json="D:\\HRNet\\coco2025\\annotations\\train.json",
        target_category_id=1
    )

二、根据新json提取对应的图片

import json
import os
import shutil

# 通过json文件提取对应的图片
def extract_coco_images(json_path, src_img_dir, dst_dir):
    """
    从COCO数据集中提取指定json对应的图片
    参数：
    json_path: COCO格式的json文件路径
    src_img_dir: 原始图片存放目录
    dst_dir: 目标存放目录
    """
    # 创建目标目录（如果不存在）
    os.makedirs(dst_dir, exist_ok=True)

    # 加载JSON文件
    with open(json_path, 'r') as f:
        data = json.load(f)

    # 遍历所有图片记录
    for img_info in data['images']:
        src_path = os.path.join(src_img_dir, img_info['file_name'])
        dst_path = os.path.join(dst_dir, img_info['file_name'])

        # 检查源文件是否存在
        if not os.path.exists(src_path):
            print(f"警告：文件 {src_path} 不存在，已跳过")
            continue

        # 复制文件
        shutil.copy(src_path, dst_path)

    print(f"完成！共复制 {len(data['images'])} 张图片到 {dst_dir}")


# 使用示例
if __name__ == "__main__":
    # 请根据实际情况修改以下路径
    extract_coco_images(
        json_path="D:\\HRNet\\dataset\\coco2025\\annotations\\val2025.json",  # 你的train.json路径
        src_img_dir="D:\\HRNet\\coco2025\\val2025",  # 原始图片目录
        dst_dir="D:\\HRNet\\dataset\\coco2025\\val2025"  # 目标输出目录
    )

三、从新json中取1000张图片标签生成小json作小数据集，之后再使用标题二中的代码再提取出这一千张图片即可

import json

# 从json中取特定数量的json作为小数据集训练
def filter_coco_dataset(input_json_path, output_json_path, num_images=1000):
    # 读取原始COCO数据
    with open(input_json_path, 'r') as f:
        data = json.load(f)

    # 选择前num_images张图片（可根据需求修改选择逻辑）
    selected_images = data['images'][:num_images]

    # 获取选中图片的ID集合
    selected_image_ids = {img['id'] for img in selected_images}

    # 过滤对应的annotations
    selected_annotations = [
        ann for ann in data['annotations']
        if ann['image_id'] in selected_image_ids
    ]

    # 构建新的数据集
    filtered_data = {
        "info": data.get("info", {}),
        "licenses": data.get("licenses", []),
        "categories": data.get("categories", []),
        "images": selected_images,
        "annotations": selected_annotations
    }

    # 保存过滤后的数据
    with open(output_json_path, 'w') as f:
        json.dump(filtered_data, f, indent=2)


# 使用示例
filter_coco_dataset(
    input_json_path='D:\\HRNet\\coco2025\\annotations\\val2025.json',
    output_json_path='D:\\HRNet\\dataset\\coco2025\\annotations\\val2025.json',
    num_images=1000
)