目标检测：labelme标注的数据集转换为coco格式

贾贾贾贾贾

已于 2024-11-18 21:58:25 修改

阅读量1.2k

点赞数 12

文章标签：目标检测

于 2024-11-15 00:02:31 首次发布

本文链接：https://blog.youkuaiyun.com/weixin_57216243/article/details/143783513

版权

1.labelme标注的json格式的文件先转为yolo格式

import json
import numpy as np
import os
import cv2

def json2yolo(path, cls_dict, types="bbox"):
    with open(path, 'r', encoding='utf-8') as fp:
       
        data = json.load(fp)
        h = data["imageHeight"]
        w = data["imageWidth"]
        shapes = data["shapes"]
        all_lines = ""
        for shape in shapes:
            if True:
                
                points = np.array(shape["points"])  
                if types == "bbox":
                    x, y, wi, hi = cv2.boundingRect(points.reshape((-1, 1, 2)).astype(np.float32))
                    cx, cy = x + wi / 2, y + hi / 2
                    cx, cy, wi, hi = cx / w, cy / h, wi / w, hi / h
                    msg = "%.4f %.4f %.4f %.4f" % (cx, cy, wi, hi)
                else:
                    points[:, 0] = points[:, 0] / w  
                    points[:, 1] = points[:, 1] / h  
                    # 把np数组转换为yolo格式的str
                    points = points.reshape(-1)
                    points = list(points)
                    points = ['%.4f' % x for x in points]  
                    msg = " ".join(points)
                l = shape['label'].lower()
                line = str(cls_dict[l]) + " " + msg + "\n"
                all_lines += line
    print(all_lines)
    filename = path.replace('json', 'txt')
    fh = open(filename, 'w', encoding='utf-8')
    fh.write(all_lines)
    fh.close()

path = "G:\Desttop\dataset\labelme/" #路径修改为自己的路径
path_list = os.listdir(path)
cls_dict = {'hand': 0, 'pen': 1} #需要更改为自己的标签
path_list2 = [x for x in path_list if ".json" in x]
for p in path_list2:
    json2yolo(path + "/" + p, cls_dict)

2.划分数据集（方便理解，没有更换路径，最终需要路径更换为自己的）

import os, shutil, random
random.seed(0)
import numpy as np
from sklearn.model_selection import train_test_split

val_size = 0.2
test_size = 0.0
postfix = 'jpg'
imgpath = 'G:\Desttop\dataset\images'
txtpath = 'G:\Desttop\dataset/txt'

os.makedirs('G:\Desttop\dataset\spilt/images/train', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/images/val', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/images/test', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/train', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/val', exist_ok=True)
os.makedirs('G:\Desttop\dataset\spilt/labels/test', exist_ok=True)

listdir = np.array([i for i in os.listdir(txtpath) if 'txt' in i])
random.shuffle(listdir)
train, val, test = listdir[:int(len(listdir) * (1 - val_size - test_size))], listdir[int(len(listdir) * (1 - val_size - test_size)):int(len(listdir) * (1 - test_size))], listdir[int(len(listdir) * (1 - test_size)):]
print(f'train set size:{len(train)} val set size:{len(val)} test set size:{len(test)}')

for i in train:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/train/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/train/{}'.format(i))

for i in val:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/val/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/val/{}'.format(i))

for i in test:
    shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'G:\Desttop\dataset\spilt/images/test/{}.{}'.format(i[:-4], postfix))
    shutil.copy('{}/{}'.format(txtpath, i), 'G:\Desttop\dataset\spilt/labels/test/{}'.format(i))

3.转为coco格式

我的目录是这样的：

images和labels下一级目录是这样的：

classes.txt是这样的：建议将最后一行空行删除，光标到最后一个位置。

import json
import shutil
from pathlib import Path
import cv2
from tqdm import tqdm


def get_datasets_path(root_dir):
    dir_list = []
    for t in type_list:
        sublayer = root_dir.joinpath(
            "images", t), root_dir.joinpath("labels", t)
        if sublayer[0].exists():
            dir_list.append(sublayer)
            assert sublayer[1].exists(), f"the path '{sublayer[1]}' is not exists"
    if len(dir_list) == 0:
        raise FileNotFoundError("the path is empty,please check you path")
    with root_dir.joinpath("classes.txt").open() as f:
        classes = f.read().strip().split()

    return dir_list, classes


def move_images(origin_images_dir, filename, path):
    dst = root_dir.joinpath("yolo2coco", f"{path[0].name}2017")
    if not dst.exists():
        dst.mkdir(parents=True)
    destination = dst.joinpath(filename)  # 获取目标文件的完整路径
    shutil.move(origin_images_dir, destination)  # 移动文件到目标位置


# 参考 https://github.com/Weifeng-Chen/dl_scripts/blob/main/detection/yolo2coco.py
def yolo2coco(dir_list, classes):
    for path in dir_list:
        dataset = {"categories": [], "annotations": [], "images": []}
        for i, cls in enumerate(classes, 0):
            dataset["categories"].append(
                {"id": i, "name": cls, "supercategory": "mark"})

        indexes = list(path[0].iterdir())
        id_count = 0
        bar = tqdm(indexes, unit=" images ")
        for k, index in enumerate(bar):
            info = f" {k + 1} / {len(indexes)}"
            bar.desc = info

            filename = index.name
            label_filename = Path(index.name).with_suffix(".txt")
            origin_labels_dir = path[1].joinpath(label_filename)
            origin_images_dir = path[0].joinpath(index)

            img = cv2.imread(origin_images_dir.as_posix())
            image_height, image_width, _ = img.shape

            dataset["images"].append({"file_name": filename,
                                      "id": k,
                                      "width": image_width,
                                      "height": image_height})
            # 移动图片到coco文件夹
            move_images(origin_images_dir, filename, path)

            if not origin_labels_dir.exists():
                print(f"{origin_images_dir} has not label")
                continue
            with origin_labels_dir.open("r") as f:
                labels = f.readlines()
                for label in labels:
                    label = label.strip().split()
                    x = float(label[1])
                    y = float(label[2])
                    w = float(label[3])
                    h = float(label[4])

                    # convert x,y,w,h to x1,y1,x2,y2
                    # 左上角和宽高
                    x1 = (x - w / 2) * image_width
                    y1 = (y - h / 2) * image_height
                    x2 = (x + w / 2) * image_width
                    y2 = (y + h / 2) * image_height

                    cls_id = int(label[0])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset["annotations"].append({
                        "area": width * height,
                        "bbox": [x1, y1, width, height],
                        "category_id": cls_id,
                        "id": id_count,
                        "image_id": k,
                        "iscrowd": 0,
                        "segmentation": [[x1, y1, x2, y1, x2, y2, x1, y2]]})
                    id_count += 1

        annotations = root_dir.joinpath("yolo2coco", "annotations")
        if not annotations.exists():
            annotations.mkdir()
        save_file = annotations.joinpath(f"instances_{path[0].name}2017.json")
        with save_file.open("w") as f:
            json.dump(dataset, f)
        print("Save annotation to {} successfully!".format(save_file))
        print("Move images of {}_dataset successfully!".format(path[0].name))


if __name__ == "__main__":
    # yolo格式数据根目录
    root_dir = Path("G:\Desttop\dataset\spilt")
    type_list = ["train", "val", "test"]
    dir_list, classes = get_datasets_path(root_dir)
    yolo2coco(dir_list, classes)