目标检测:labelme标注的数据集转换为coco格式

1.labelme标注的json格式的文件先转为yolo格式

import json
import numpy as np
import os
import cv2

def json2yolo(path, cls_dict, types="bbox"):
    with open(path, 'r', encoding='utf-8') as fp:
       
        data = json.load(fp)
        h = data["imageHeight"]
        w = data["imageWidth"]
        shapes = data["shapes"]
        all_lines = ""
        for shape in shapes:
            if True:
                
                points = np.array(shape["points"])  
                if types == "bbox":
                    x, y, wi, hi = cv2.boundingRect(points.reshape((-1, 1, 2)).astype(np.float32))
                    cx, cy = x + wi / 2, y + hi / 2
                    cx, cy, wi, hi = cx / w, cy / h, wi / w, hi / h
                    msg = "%.4f %.4f %.4f %.4f" % (cx, cy, wi, hi)
                else:
                    points[:, 0] = points[:, 0] / w  
                    points[:, 1] = points[:, 1] / h  
                    # 把np数组转换为yolo格式的str
                    points = points.reshape(-1)
                    points = list(points)
                    points = ['%.4f' % x for x in points]  
                    msg = " ".join(points)
                l = shape['label'].lower()
                line = str(cls_dict[l]) + " " + msg + "\n"
                all_lines += line
    print(all_lines)
    filename = path.replace('json', 'txt')
    fh = open(filename, 'w', encoding='utf-8')
    fh.write(all_lines)
    fh.close()

path = "G:\Desttop\dataset\labelme/" #路径修改为自己的路径
path_list = os.listdir(path)
cls_dict = {'hand': 0, 'pen': 1} #需要更改为自己的标签
path_list2 = [x for x in path_list if ".json" in x]
for p in path_list2:
    json2yolo(path + "/" + p, cls_dict)

2.划分数据集(方便理解,没有更换路径,最终需要路径更换为自己的)

import os, shutil, random
random.seed(0)
import numpy as np
from sklearn.model_selection import train_test_split

val_size = 0.2
test_size = 0.0
postfix = 'jpg'
imgpath = 'G:\Desttop\dataset\images'
txtpath = 'G:\Desttop\dataset/txt'

os.makedirs('G:\Desttop\dataset\spilt/images/train', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/images/val', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/images/test', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/train', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/val', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/test', exist_ok=True)

listdir = np.array([i for i in os.listdir(txtpath) if 'txt' in i])
random.shuffle(listdir)
train, val, test = listdir[:int(len(listdir) * (1 - val_size - test_size))], listdir[int(len(listdir) * (1 - val_size - test_size)):int(len(listdir) * (1 - test_size))], listdir[int(len(listdir) * (1 - test_size)):]
print(f'train set size:{len(train)} val set size:{len(val)} test set size:{len(test)}')

for i in train:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/train/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/train/{}'.format(i))

for i in val:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/val/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/val/{}'.format(i))

for i in test:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/test/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/test/{}'.format(i))

3.转为coco格式

我的目录是这样的:

images和labels下一级目录是这样的:

classes.txt是这样的:建议将最后一行空行删除,光标到最后一个位置。

import json
import shutil
from pathlib import Path
import cv2
from tqdm import tqdm


def get_datasets_path(root_dir):
    dir_list = []
    for t in type_list:
        sublayer = root_dir.joinpath(
            "images", t), root_dir.joinpath("labels", t)
        if sublayer[0].exists():
            dir_list.append(sublayer)
            assert sublayer[1].exists(), f"the path '{sublayer[1]}' is not exists"
    if len(dir_list) == 0:
        raise FileNotFoundError("the path is empty,please check you path")
    with root_dir.joinpath("classes.txt").open() as f:
        classes = f.read().strip().split()

    return dir_list, classes


def move_images(origin_images_dir, filename, path):
    dst = root_dir.joinpath("yolo2coco", f"{path[0].name}2017")
    if not dst.exists():
        dst.mkdir(parents=True)
    destination = dst.joinpath(filename)  # 获取目标文件的完整路径
    shutil.move(origin_images_dir, destination)  # 移动文件到目标位置


# 参考 https://github.com/Weifeng-Chen/dl_scripts/blob/main/detection/yolo2coco.py
def yolo2coco(dir_list, classes):
    for path in dir_list:
        dataset = {"categories": [], "annotations": [], "images": []}
        for i, cls in enumerate(classes, 0):
            dataset["categories"].append(
                {"id": i, "name": cls, "supercategory": "mark"})

        indexes = list(path[0].iterdir())
        id_count = 0
        bar = tqdm(indexes, unit=" images ")
        for k, index in enumerate(bar):
            info = f" {k + 1} / {len(indexes)}"
            bar.desc = info

            filename = index.name
            label_filename = Path(index.name).with_suffix(".txt")
            origin_labels_dir = path[1].joinpath(label_filename)
            origin_images_dir = path[0].joinpath(index)

            img = cv2.imread(origin_images_dir.as_posix())
            image_height, image_width, _ = img.shape

            dataset["images"].append({"file_name": filename,
                                      "id": k,
                                      "width": image_width,
                                      "height": image_height})
            # 移动图片到coco文件夹
            move_images(origin_images_dir, filename, path)

            if not origin_labels_dir.exists():
                print(f"{origin_images_dir} has not label")
                continue
            with origin_labels_dir.open("r") as f:
                labels = f.readlines()
                for label in labels:
                    label = label.strip().split()
                    x = float(label[1])
                    y = float(label[2])
                    w = float(label[3])
                    h = float(label[4])

                    # convert x,y,w,h to x1,y1,x2,y2
                    # 左上角和宽高
                    x1 = (x - w / 2) * image_width
                    y1 = (y - h / 2) * image_height
                    x2 = (x + w / 2) * image_width
                    y2 = (y + h / 2) * image_height

                    cls_id = int(label[0])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset["annotations"].append({
                        "area": width * height,
                        "bbox": [x1, y1, width, height],
                        "category_id": cls_id,
                        "id": id_count,
                        "image_id": k,
                        "iscrowd": 0,
                        "segmentation": [[x1, y1, x2, y1, x2, y2, x1, y2]]})
                    id_count += 1

        annotations = root_dir.joinpath("yolo2coco", "annotations")
        if not annotations.exists():
            annotations.mkdir()
        save_file = annotations.joinpath(f"instances_{path[0].name}2017.json")
        with save_file.open("w") as f:
            json.dump(dataset, f)
        print("Save annotation to {} successfully!".format(save_file))
        print("Move images of {}_dataset successfully!".format(path[0].name))


if __name__ == "__main__":
    # yolo格式数据根目录
    root_dir = Path("G:\Desttop\dataset\spilt")
    type_list = ["train", "val", "test"]
    dir_list, classes = get_datasets_path(root_dir)
    yolo2coco(dir_list, classes)

Labelme是一个用于图像标注的工具,生成的文件格式为JSON。而COCO(Common Objects in Context)是一种广泛使用的图像数据集格式,支持对象检测、分割等任务。Labelme生成的JSON文件需要转换COCO格式,以便在COCO兼容的框架和工具中使用。以下是将Labelme JSON文件转换COCO格式的步骤: 1. **安装必要的库**: 需要安装`labelme`和`pycocotools`库。可以使用以下命令进行安装: ```bash pip install labelme pycocotools ``` 2. **编写转换脚本**: 编写一个Python脚本,将Labelme的JSON文件转换COCO格式。以下是一个简单的示例脚本: ```python import json import os from labelme import utils from pycocotools import mask as coco_mask def convert_labelme_to_coco(labelme_json, coco_json): # 读取Labelme JSON文件 with open(labelme_json, 'r') as f: labelme_data = json.load(f) images = [] annotations = [] categories = [] label_to_id = {} annotation_id = 1 image_id = 1 for shape in labelme_data['shapes']: label = shape['label'] if label not in label_to_id: label_to_id[label] = len(label_to_id) + 1 categories.append({ 'id': label_to_id[label], 'name': label, 'supercategory': 'none' }) points = shape['points'] mask = utils.polygons_to_mask([labelme_data['imageHeight'], labelme_data['imageWidth']], [points]) binary_mask = mask.astype('uint8') encoded_mask = coco_mask.encode({'size': binary_mask.shape, 'counts': binary_mask.tobytes()}) area = int(coco_mask.area(encoded_mask)) bbox = coco_mask.toBbox(encoded_mask).tolist() annotations.append({ 'id': annotation_id, 'image_id': image_id, 'category_id': label_to_id[label], 'segmentation': encoded_mask['counts'].decode('utf-8'), 'area': area, 'bbox': bbox, 'iscrowd': 0 }) annotation_id += 1 images.append({ 'id': image_id, 'file_name': labelme_data['imagePath'], 'width': labelme_data['imageWidth'], 'height': labelme_data['imageHeight'] }) coco_data = { 'images': images, 'annotations': annotations, 'categories': categories } with open(coco_json, 'w') as f: json.dump(coco_data, f, indent=4) if __name__ == "__main__": labelme_json = 'path/to/labelme.json' coco_json = 'path/to/coco.json' convert_labelme_to_coco(labelme_json, coco_json) ``` 3. **运行脚本**: 将脚本保存为`convert_labelme_to_coco.py`,并运行: ```bash python convert_labelme_to_coco.py ``` 通过上述步骤,你可以将Labelme生成的JSON文件转换COCO格式,以便在各种计算机视觉任务中使用。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值