COCO数据集图片标注小工具

最新推荐文章于 2024-10-24 15:49:28 发布
摩羯mc_six
最新推荐文章于 2024-10-24 15:49:28 发布
阅读量563
点赞数 1
文章标签： python json 目标检测
本文链接：https://blog.youkuaiyun.com/u010297049/article/details/139045597
版权
最近深度学习，用到COCO数据集，需要对图片进行一些标注例如：添加矩形框并标注类型、添加字幕、添加人体关键点、添加关键物体的掩膜（mask）、添加关键物体的外轮廓等。经过在网上的搜索整理，做出了自认为比较实用的的小工具，特进行记录并共享给网友。
import os, time
from pycocotools.coco import COCO
from random import random
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import cv2

def pic_caption(coco, pic_id, i_path, s_path, position,color_n, t_size, delay_t):
    print("make captions.......................................................")
    ann_ids = coco.getAnnIds(imgIds=pic_id)
    # 根据annotations idx信息获取所有标注信息
    targets = coco.loadAnns(ann_ids)
    # get image file name
    path = coco.loadImgs(pic_id)[0]['file_name']
    # read image
    img = Image.open(os.path.join(i_path, path)).convert('RGB')
    width, height = img.size
    draw = ImageDraw.Draw(img)
    x = 0
    n = 0
    for ann in targets:
        font = ImageFont.truetype('arial.ttf', size=t_size)
        if "up" in position:
            draw.text((x, 0 + n * (t_size + 3)), ann['caption'], fill=color_n, font=font)  # 定义文字内容，fill 为字体颜色(r,g,b)
        elif "down" in position:
            draw.text((x, height - (n+1) * (t_size + 3)), ann['caption'], fill=color_n, font=font)  # 定义文字内容，fill 为字体颜色(r,g,b)
        n += 1
    plt.figure(path)
    plt.imshow(img)
    plt.show(block=False)
    plt.ion()
    plt.pause(delay_t)
    plt.close()
    img.save(s_path + path[:-4]  + '_caption'+ '.jpg')  # 另存加上了框框的图片


def pic_rect(coco, pic_id, i_path, s_path, color_n, t_size, delay_t):
    print("make rectangle.......................................................")
    coco_classes = dict([(v["id"], v["name"]) for k, v in coco.cats.items()])
    # 获取对应图像id的所有annotations idx信息
    ann_ids = coco.getAnnIds(imgIds=pic_id)
    # 根据annotations idx信息获取所有标注信息
    targets = coco.loadAnns(ann_ids)
    # get image file name
    path = coco.loadImgs(pic_id)[0]['file_name']
    # read image
    img = Image.open(os.path.join(i_path, path)).convert('RGB')
    draw = ImageDraw.Draw(img)
    # draw box to image
    color_list = ['red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon',
                  'red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon',
                  'red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon',
                  'red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon',
                  'red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon',
                  'red', 'blue', 'green', 'yellow', 'orange', 'pink', 'brown', 'navy', 'gold', 'tomato', 'maroon', ]
    n = 0
    for target in targets:
        x, y, w, h = target["bbox"]
        x1, y1, x2, y2 = x, y, int(x + w), int(y + h)
        draw.rectangle((x1, y1, x2, y2), outline=color_list[n],
                       width=3)  # outline  是外框颜色，可自己定义，这里是循环调用了一个数组,whdth为框线的粗细。
        font = ImageFont.truetype('arial.ttf', size=t_size)  # 定义字体，文字大小
        draw.text((x1, y1), coco_classes[target["category_id"]], fill=color_n, font=font)  # 定义文字内容，fill 为字体颜色(r,g,b)
        n += 1

    plt.figure(path)
    plt.imshow(img)
    plt.show(block=False)
    plt.ion()
    plt.pause(delay_t)
    plt.close()
    img.save(s_path  + path[:-4] + '_rect'+ '.jpg')  # 另存加上了框框的图片


def pic_mask(coco, pic_id, i_path, s_path, str=' ', delay_t=1):
    print("make mask.......................................................")
    # 需要画图的是第num副图片, 对应的json路径和图片路径,
    # str = ' '为类别字符串，输入必须为字符串形式 'str'，若为空，则返回所有类别id
    catIds = coco.getCatIds(catNms=['str'])  # 获取指定类别 id
    path = coco.loadImgs(pic_id)[0]['file_name']  # 加载图片,loadImgs() 返回的是只有一个内嵌字典元素的list, 使用[0]来访问这个元素
    image = cv2.imread(os.path.join(i_path, path))
    annIds = coco.getAnnIds(imgIds=pic_id, catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    zeros = np.zeros((image.shape), dtype=np.uint8)
    alpha = 1
    beta = 0.6
    gamma = 0
    for ann in anns:
        segs = ann['segmentation']
        if 'list' in type(segs).__name__:
            for seg in segs:
                poly = []
                c_out = (250, 0, 0)
                c_in = (int(random() * 255), int(random() * 255), int(random() * 255))
                for i in range(int(len(seg) / 2)):
                    poly.append([seg[i * 2], seg[i * 2 + 1]])
                poly_ = np.reshape(np.array(poly), [-1, 1, 2]).astype(np.int32)
                image = cv2.polylines(np.array(image), [poly_], True, c_out, 2, )
                mask = cv2.fillPoly(zeros, [poly_], color=c_in, )
                mask_img = cv2.addWeighted(image, alpha, mask, beta, gamma)
        else:
            pass
    cv2.imshow(path, mask_img)
    cv2.waitKey(delay_t * 1000)
    cv2.destroyAllWindows()
    cv2.imwrite(s_path + path[:-4] + '_mask' + '.png', mask_img)


def pic_person_key_point(coco, img_id, i_path, s_path, delay_t):
    print("make person_keypoint.......................................................")
    catIds = coco.getCatIds(catNms=['person'])
    aColor = [(0, 255, 0, 0), (255, 0, 0, 0), (0, 0, 255, 0), (0, 255, 255, 0)]
    path = coco.loadImgs(img_id)[0]['file_name']
    matImg = Image.open('%s/%s' % (i_path, path)).convert('RGB')
    draw = ImageDraw.Draw(matImg)
    # # 通过输入图片的id、类别的id、实例的面积、是否是人群来得到图片的注释id
    annIds = coco.getAnnIds(imgIds=img_id, catIds=catIds, iscrowd=None)
    # # 通过注释的id，得到注释的信息
    anns = coco.loadAnns(annIds)
    if len(anns) != 0:
        for ann in anns:
            sks = np.array(coco.loadCats(ann['category_id'])[0]['skeleton']) - 1
            kp = np.array(ann['keypoints'])
            x = kp[0::3]
            y = kp[1::3]
            v = kp[2::3]
            for sk in sks:
                c = aColor[np.random.randint(0, 4)]
                if np.all(v[sk] > 0):
                    # 画点之间的连接线
                    draw.line([(x[sk][0], y[sk][0]), (x[sk][1], y[sk][1])], c, 3)
            for i in range(x.shape[0]):
                draw.ellipse((x[i] - 3, y[i] - 3, x[i] + 3, y[i] + 3), fill=(0, 255, 0), outline=(255, 0, 0))
        plt.figure(path)
        plt.imshow(matImg)
        plt.show(block=False)
        plt.ion()
        plt.pause(delay_t)
        plt.close()
        matImg.save(s_path + path[:-4]  + '_pkp'+ '.jpg')  # 另存加上了框框的图片


def pic_polygon(coco, pic_id, i_path, s_path, str=' ', delay_t=1):
    print("make polygon.......................................................")
    # str = ' '为类别字符串，输入必须为字符串形式 'str'，若为空，则返回所有类别id
    catIds = coco.getCatIds(catNms=['str'])  # 获取指定类别 id
    path = coco.loadImgs(pic_id)[0]['file_name']  # 加载图片,loadImgs() 返回的是只有一个内嵌字典元素的list, 使用[0]来访问这个元素
    image = cv2.imread(os.path.join(i_path, path))
    annIds = coco.getAnnIds(imgIds=pic_id, catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    for ann in anns:
        segs = ann['segmentation']
        if 'list' in type(segs).__name__:
            for seg in segs:
                poly = []
                c_out = (int(random() * 100), int(random() * 100), int(random() * 100))
                for i in range(int(len(seg) / 2)):
                    poly.append([seg[i * 2], seg[i * 2 + 1]])
                poly_ = np.reshape(np.array(poly), [-1, 1, 2]).astype(np.int32)
                image = cv2.polylines(np.array(image), [poly_], True, c_out, 3, )

        else:
            pass
    cv2.imshow(path, image)
    cv2.waitKey(delay_t * 1000)
    cv2.destroyAllWindows()
    cv2.imwrite(s_path + path[:-4] + '_poly' + '.jpg', image)


if __name__ == "__main__":
    year = 2014
    pic_path = 'val'

    cap_file = 'C:/coco_data/annotations/captions_' + pic_path + str(year) + '.json'
    json_file = 'C:/coco_data/annotations/instances_' + pic_path + str(year) + '.json'
    pkp_file = 'C:/coco_data/annotations/person_keypoints_' + pic_path + str(year) + '.json'
    img_path = 'C:/coco_data/' + pic_path + str(year)
    save_path = 'C:/coco_data/result_test/'

    ids = [559665,391895,60623,483108,384213,386164,223648,403385,294832,462565,436141,192440,1146,394240, ]
    delay = 1

    coco_j = COCO(json_file)
    coco_c = COCO(cap_file)
    coco_p = COCO(pkp_file)

    for id in ids:
        print(id, '    ', coco_j.loadImgs(id)[0]['file_name'])
        pic_person_key_point(coco_p, id, img_path, save_path, delay_t=delay)
        pic_caption(coco_c, id, img_path, save_path, position="down",color_n=(0, 0, 255), t_size=18, delay_t=delay)
        pic_mask(coco_j, id, img_path, save_path, str='', delay_t=delay)
        pic_rect(coco_j, id, img_path, save_path, color_n=(0, 0, 255), t_size=18, delay_t=delay)
        pic_polygon(coco_j, id, img_path, save_path, str='', delay_t=delay)
运行结果图片如下：