在尝试VOC的实例分割时,在网上查了很多资料都没有明确给出实例分割的标签是怎么排序和对应到检测标签的。
标签的排序实际也是根据BGR三通道类似二进制升序排序的。具体结论可以直接跳到这篇的最后。
在不知道标签排序的情况下,于是我首先尝试了在每个检测框中找到除背景和边缘之外像素数量最多的颜色作为这个检测标签对应的物体。代码主要使用了python opencv,文件在VOCdevkit的同级,代码如下:
# -*- coding: UTF-8 -*-
import os
#osimport os.path
import xml.etree.ElementTree as xmlET
from xml.etree.ElementTree import Element
import cv2
import numpy as np
#from PIL import Image, ImageDraw
classes = ('__background__', # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
file_path_img = './VOCdevkit/VOC2012/JPEGImages'
file_path_train = './VOCdevkit/VOC2012/ImageSets/Segmentation' #分割训练集名单位置
file_path_mask = './VOCdevkit/VOC2012/SegmentationObject'
file_path_xml = './VOCdevkit/VOC2012/Annotations'
save_xml_path = './VOCdevkit/VOC2012/Annotations_output'
save_file_path = './VOCdevkit/VOC2012/img_output'
# 文件名从文件获取
pathDir = os.listdir(file_path_xml)
fp = open(os.path.join(file_path_train,'trainval.txt'))
for idx in range(len(pathDir)): # 跑完整数据集用
# for idx in range(1, 10):
print(idx)
line = fp.readline()
filename = line[:-1]+'.xml'
tree = xmlET.parse(os.path.join(file_path_xml, filename))
objs = tree.findall('object')
num_objs = len(objs)
boxes = np.zeros((num_objs, 5), dtype=np.uint16)
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
cla = obj.find('name').text
label = classes.index(cla)
boxes[ix, 0:4] = [x1, y1, x2, y2]
boxes[ix, 4] = label
image_name = os.path.splitext(filename)[0]
img = cv2.imread(os.path.join(file_path_img, image_name + '.jpg'))
mask = cv2.imread(os.path.join(file_path_mask, image_name + '.png'))
for ix in range(len(boxes)