自建数据集系列:从RGB->二值mask->coco格式

这篇博客介绍了如何将RGB图像转换为二值mask,并进一步将这些mask转化为COCO格式的数据集。首先,通过`rgb2mask.py`脚本将RGB图像分割成二值mask,然后使用`mask2coco.py`将这些mask转换为COCO格式的JSON文件,同时处理图像文件,以供模型训练使用。整个过程涉及图像处理、文件重命名和数据集结构的构建,适用于实例分割任务。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

前言

前文说到,mask可以转labelme,然后再转coco。但对于实例级的mask事情变得有点不同,需先将RGB的mask拆解为二值mask然后进行直接操作,省略labelme这一步骤。

在这里插入图片描述

image文件夹下
在这里插入图片描述

annotations文件夹下
在这里插入图片描述模型加载处理好的数据效果
在这里插入图片描述

RGB转二值mask

在这里插入图片描述rgb2mask.py

import cv2
import numpy as np
import os, glob
 
part = "test"

def rgb2masks(label_name):
# Camouflaged:
# COD10K-CAM-SuperNumber-SuperClass-SubNumber-SubClass-ImageNumber

# Non-Camouflaged:
# COD10K-NonCAM-SuperNumber-SuperClass-SubNumber-SubClass-ImageNumber

    name = os.path.split(label_name)[-1].split('.')[0]
    lbl_id = name.split('-')[-1]
    subClass = name.split('-')[-2]
    lbl = cv2.imread(label_name, 1)
    try:
        h, w = lbl.shape[:2]
    
        leaf_dict = {}
        idx = 0
        white_mask = np.ones((h, w, 3), dtype=np.uint8) * 255
        for i in range(h):
            for j in range(w):
                if tuple(lbl[i][j]) in leaf_dict or tuple(lbl[i][j]) == (0, 0, 0):
                    continue
                leaf_dict[tuple(lbl[i][j])] = idx
                mask = (lbl == lbl[i][j]).all(-1)
                # leaf = lbl * mask[..., None]      # colorful leaf with black background
                # np.repeat(mask[...,None],3,axis=2)    # 3D mask
                leaf = np.where(mask[..., None], white_mask, 0)
                mask_name = './'+part+'/annotations/' + lbl_id +'_'+subClass +'_'+ str(idx) + '.png'  # ImageNumber_SubClass_idx.png
                cv2.imwrite(mask_name, leaf)
                idx += 1
        print("正常:"+label_name)
    except:
        print("cannot read:"+label_name)
        with open(part+"_error.txt",'a+') as f:
            f.write(label_name)
            f.write('\n')
 
label_dir = './instance_'+part
label_list = glob.glob(os.path.join(label_dir, '*.png'))
for label_name in label_list:
    rgb2masks(label_name)

二值mask转coco格式

这块的代码作用有二,1.将原始图片移动并重命名 2.生成coco的json文件

mask2coco.py

import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
from glob import glob
import cv2
import shutil
 
part = 'test'
IMAGE_SRC = 'C:/Users/awei/Desktop/rgb2mask/Image_'+part+'/'
ROOT_DIR = 'C:/Users/awei/Desktop/rgb2mask/modify_'+part
IMAGE_DIR = os.path.join(ROOT_DIR, "image")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")
 
INFO = {
    "description": "Leaf Dataset",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2017,
    "contributor": "Francis_Liu",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}
 
LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

# 根据自己的需要添加种类
CATEGORIES = [
    # {
    #     'id': 1,  # 是数字1,不是字符串
    #     'name': 'leaf',
    #     'supercategory': 'leaf',
    # }
]

# Camouflaged:
# COD10K-CAM-SuperNumber-SuperClass-SubNumber-SubClass-ImageNumber

# Non-Camouflaged:
# COD10K-NonCAM-SuperNumber-SuperClass-SubNumber-SubClass-ImageNumber
# Super_Class_Dictionary = {'1':'Aquatic', '2':'Terrestrial', '3':'Flying', '4':'Amphibian', '5':'Other'}
# Sub_Class_Dictionary = {'1':'batFish','2':'clownFish','3':'crab','4':'crocodile','5':'crocodileFish','6':'fish','7':'flounder',
#              '8':'frogFish','9':'ghostPipefish','10':'leafySeaDragon','11':'octopus','12':'pagurian','13':'pipefish',
#               '14':'scorpionFish','15':'seaHorse','16':'shrimp','17':'slug','18':'starFish','19':'stingaree',
#               '20':'turtle','21':'ant','22':'bug','23':'cat','24':'caterpillar','25':'centipede','26':'chameleon',
#               '27':'cheetah','28':'deer','29':'dog','30':'duck','31':'gecko','32':'giraffe','33':'grouse','34':'human',
#               '35':'kangaroo','36':'leopard','37':'lion','38':'lizard','39':'monkey','40':'rabbit','41':'reccoon',
#               '42':'sciuridae','43':'sheep','44':'snake','45':'spider','46':'stickInsect','47':'tiger','48':'wolf',
#               '49':'worm','50':'bat','51':'bee','52':'beetle','53':'bird','54':'bittern','55':'butterfly','56':'cicada',
#               '57':'dragonfly','58':'frogmouth','59':'grasshopper','60':'heron','61':'katydid','62':'mantis',
#               '63':'mockingbird','64':'moth','65':'owl','66':'owlfly','67':'frog','68':'toad','69':'other'}

def getCategories():
    image_files = glob(IMAGE_SRC + "*.jpg")
    subClassList = []
    temp = []
    for image in image_files:
        
        image_name = os.path.basename(image).split('.')[0]
        try:
            _,type,superNumer,superClass,subNumber,subClass,imageNumber = image_name.split('-')
        except:
            print("NonCAM")
            continue


        if not type=="CAM":
            continue


        if not os.path.exists(IMAGE_DIR+"/"+str(imageNumber)+".jpg"):
            shutil.copy(image, IMAGE_DIR+"/"+str(imageNumber)+".jpg")
        if subClass not in subClassList:
            subClassList.append(subClass)
            item = {'id':int(subNumber),  # 强转int类型,很重要!!
                    'name':subClass,
                    'supercategory':superClass
            }
            temp.append(item)
    global CATEGORIES
    CATEGORIES = sorted(temp,key=lambda x: x["id"])

    
 
def filter_for_jpeg(root, files):
    file_types = ['*.jpeg', '*.jpg', '*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    return files
 
 
def filter_for_annotations(root, files, image_filename):
    file_types = ['*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + '_.*'   # 用于匹配对应的二值mask
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
    return files
 
 
def main():
    getCategories()
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }


 
    image_id = 1
    segmentation_id = 1
 
    # filter for jpeg images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_jpeg(root, files)
 
        # go through each image
        for image_filename in image_files:
            image = Image.open(image_filename)
            image_info = pycococreatortools.create_image_info(
                    image_id, os.path.basename(image_filename), image.size)
            coco_output["images"].append(image_info)
 
            # filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_annotations(root, files, image_filename)
 
                # go through each associated annotation
                for annotation_filename in annotation_files:
 
                    
                    # class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]
                    class_id = [x['id'] for x in CATEGORIES if x['name'].upper() == annotation_filename.split('_')[-2].upper()][0]  # 精确匹配类型名

                    print(annotation_filename+" "+str(class_id))
 
                    category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
                    binary_mask = np.asarray(Image.open(annotation_filename)
                                             .convert('1')).astype(np.uint8)
 
                    annotation_info = pycococreatortools.create_annotation_info(
                            segmentation_id, image_id, category_info, binary_mask,
                            image.size, tolerance=2)
 
                    if annotation_info is not None:
                        coco_output["annotations"].append(annotation_info)
 
                    segmentation_id = segmentation_id + 1
 
            image_id = image_id + 1
 
    with open(ROOT_DIR+'/instances_'+part+'2017.json', 'w') as output_json_file:
        json.dump(coco_output, output_json_file)
 
 
if __name__ == "__main__":
    main()

🔰 汇总 🔰

1.从labelImg格式->txt格式(YOLO格式、ICDAR2015格式)

2.从二值mask->labelme格式->coco格式

3.从labelme格式->VOC格式+从二值mask->VOC格式

🔷4.从RGB->二值mask->coco格式

5.实例分割mask->语义分割mask->扩增mask

6.COCO格式->YOLO格式

双模图片数据与对应标注文件的命名对齐

xml标注文件的节点、属性、文本的修正

cocoJson数据集统计分析

### 使用PyTorch实现Mask R-CNN并在COCO数据集上训练 为了使用 PyTorch 实现 Mask R-CNN 并在 COCO 数据集上进行训练,可以遵循以下方法: #### 安装依赖库 首先安装必要的 Python 库。这通常包括 `torch`, `torchvision` 和其他辅助工具。 ```bash pip install torch torchvision torchaudio ``` 对于特定于 Mask R-CNN 的需求,可能还需要安装额外的包如 `pycocotools`. ```bash pip install pycocotools ``` #### 准备环境与配置文件 下载并设置好官方提供的预训练权重以及配置文件。可以从 GitHub 上获取最新的 Mask R-CNN 模型代码仓库[^1]。 #### 加载和准备数据集 加载 COCO 数据集,并将其换成适合输入到网络中的格式。这部分工作可以通过继承自 `torch.utils.data.Dataset` 类来自定义完成。 ```python from PIL import Image import os import numpy as np import torch from torchvision.transforms.functional import to_tensor class CocoDataset(torch.utils.data.Dataset): def __init__(self, root, annotation_file, transforms=None): from pycocotools.coco import COCO self.root = root self.transforms = transforms self.coco = COCO(annotation_file) self.ids = list(sorted(self.coco.imgs.keys())) def __getitem__(self, index): coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') num_objs = len(target) boxes = [] labels = [] masks = [] for i in range(num_objs): xmin = min(max(0, int(target[i]['bbox'][0])), 640) ymin = min(max(0, int(target[i]['bbox'][1])), 640) xmax = min(max(xmin + 1, int(target[i]['bbox'][0] + target[i]['bbox'][2])), 640) ymax = min(max(ymin + 1, int(target[i]['bbox'][1] + target[i]['bbox'][3])), 640) boxes.append([xmin, ymin, xmax, ymax]) labels.append(target[i]["category_id"]) mask = coco.annToMask(target[i]) if mask.max() < 1: continue masks.append(mask) boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.tensor(labels, dtype=torch.int64) masks = torch.as_tensor(masks, dtype=torch.uint8) image_id = torch.tensor([img_id]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) iscrowd = torch.zeros((num_objs,), dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"] = labels target["masks"] = masks target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.ids) dataset_train = CocoDataset(root='path_to_coco_images', annotation_file='annotations/instances_train2017.json', transforms=to_tensor) data_loader = torch.utils.data.DataLoader( dataset_train, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)) ) ``` #### 构建模型架构 利用 TorchVision 中已有的 MaskRCNN 结构作为基础构建新的实例分割模型。 ```python import torchvision.models.detection.mask_rcnn model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=True) ``` #### 训练过程 编写训练循环逻辑,在此期间调整超参数以优化性能表现。 ```python device = 'cuda' if torch.cuda.is_available() else 'cpu' model.to(device); params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) for epoch in range(num_epochs): train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10) lr_scheduler.step() evaluate(model, data_loader_test, device=device) ``` 上述代码展示了如何基于 PyTorch 来搭建一个完整的 Mask R-CNN 流程,从数据处理到最后的评估阶段都进行了详细的说明[^2].
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

星空•物语

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值