目录
1 动机
采用mmsegmentation进行模型训练时,避免重写数据加载模块,直接将自建数据集(采用LabelMe标注)转换为mmsegmentation支持的数据集格式,如CityScape。
2 mmsegmentation支持的CityScape数据集格式
cityscapes
├── leftImg8bit
│ ├── train
| | ├── ***__leftImg8bit.png
| | ├── ...
│ ├── val
| | ├── ***__leftImg8bit.png
| | ├── ...
├── gtFine
│ ├── train
| | ├── ***_gtFine_labelTrainIds.png
| | ├── ***__gtFine_color.png
| | ├── ***_gtFine_polygons.json
| | ├── ...
│ ├── val
| | ├── ***_gtFine_labelTrainIds.png
| | ├── ***__gtFine_color.png
| | ├── ***_gtFine_polygons.json
| | ├── ...
3 LabelMe标注数据集格式
dataset
├── Annotations_json
| ├── ***.json
| ├── ...
├── JPEGImages
| ├── ***.jpg
| ├── ...
4 工程文件结构
project ├── dataset | ├── Annotations_json | | ├── ***.json | | ├── ... | ├── JPEGImages | | ├── ***.jpg | | ├── ... ├── cityscapescript | ├── __init__.py | ├── annotation.py | ├── json2labelImage.py ├── labelme2cityscape.py ├── split_dataset.py
4.1 分割训练集
- split_dataset.py
# !split_dataset.py
import os
import argparse
import random
from imutils import paths
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--label_dir", type=str, default="dataset/Annotations_json", help="The labelme labels directory.")
parser.add_argument("--train_ratio", type=float, default=0.7, help="Ratio of train set. The others are test sets.")
return parser.parse_args()
if __name__ == '__main__':
args = get_args()
label_paths = sorted(paths.list_files(args.label_dir, "json"))
random.shuffle(label_paths)
train_file = open("train_list.txt", 'w')
test_file = open("test_list.txt", "w")
label_num = len(label_paths)
train_set_num = int(label_num * args.train_ratio)
for i, label_path in enumerate(label_paths):
print("[{}/{}]: {}".format(i + 1, label_num, label_path))
filename = os.path.split(label_path)[-1].split(".")[0]
if i < train_set_num:
train_file.write("{}\n".format(filename))
else:
test_file.write("{}\n".format(filename))
train_file.close()
test_file.close()
4.2 将LabelMe格式标注转换为CityScape格式标注
- labelme2cityscape.py
# !labelme2cityscape
import os
import cv2
import json
import shutil
import argparse
import numpy as np
from PIL import Image
from cityscapescript.annotation import Annotation
from cityscapescript.json2labelImage import createLabelImage
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(MyEncoder, self).default(obj)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--input", type=str, default="dataset", help="input dataset directory.")
parser.add_argument("--output", type=str, default="cityscape", help="output dataset directory.")
return parser.parse_args()
def copy_image2output_dir(image_path, output_dir, filename):
image_type = image_path.split(".")[-1]
new_image_path = os.path.sep.join([output_dir, "{}_leftImg8bit.png".format(filename)])
if image_type == "png":
shutil.copy(image_path, new_image_path)
else:
image = Image.open(image_path)
image.save(new_image_path)
def deal_json(json_file_path):
data_cs = {}
objects = []
num = -1
num = num + 1
if not json_file_path.endswith('.json'):
print('Cannot generating dataset from:', json_file_path)
return None
with open(json_file_path) as f:
print('Generating dataset from:', json_file_path)
data = json.load(f)
data_cs['imgHeight'] = data['imageHeight']
data_cs['imgWidth'] = data['imageWidth']
for shapes in data['shapes']:
obj = {}
label = shapes['label']
obj['label'] = label
points = shapes['points']
p_type = shapes['shape_type']
if p_type == 'polygon':
obj['polygon'] = points
objects.append(obj)
data_cs['objects'] = objects
return data_cs
def generate_cityscape_label(label_path, output_dir, filename, color_map):
# json label
new_label_path = os.path.sep.join([output_dir, "{}_gtFine_polygons.json".format(filename)])
data_cs = deal_json(label_path)
json.dump(
data_cs,
open(new_label_path, 'w'),
indent=4,
cls=MyEncoder, )
annotation = Annotation()
annotation.fromJsonFile(new_label_path)
# trainIds
trainIds_image_path = os.path.sep.join([output_dir, "{}_gtFine_labelTrainIds.png".format(filename)])
trainIds_image = createLabelImage(annotation, "trainIds", color_map)
trainIds_image.save(trainIds_image_path)
# color
color_image_path = os.path.sep.join([output_dir, "{}_gtFine_color.png".format(filename)])
color_image = createLabelImage(annotation, "color", color_map)
color_image.save(color_image_path)
def deal_dataset(args, dataset_type, color_map):
image_dir = "leftImg8bit"
label_dir = "gtFine"
image_save_dir = os.path.sep.join([args.output, image_dir, dataset_type])
label_save_dir = os.path.sep.join([args.output, label_dir, dataset_type])
if not os.path.exists(image_save_dir):
os.makedirs(image_save_dir)
if not os.path.exists(label_save_dir):
os.makedirs(label_save_dir)
list_file_path = ""
if dataset_type == "train":
list_file_path = os.path.sep.join([args.input, "train_list.txt"])
elif dataset_type == "test":
list_file_path = os.path.sep.join([args.input, "test_list.txt"])
if not os.path.exists(list_file_path):
raise "File not exist. {}".format(list_file_path)
with open(list_file_path, 'r') as f:
for line in f:
line = line.strip()
image_path = os.path.sep.join([args.input, "JPEGImages", "{}.jpg".format(line)])
if not os.path.exists(image_path):
image_path = image_path.replace("jpg", "png")
if not os.path.exists(image_path):
raise "Image file not exist. Only support png and jpg file. {}".format(image_path)
label_path = os.path.sep.join([args.input, "Annotations_json", "{}.json".format(line)])
print(image_path, label_path)
# copy image to output directory
copy_image2output_dir(image_path, image_save_dir, line)
# generate label to output directory
generate_cityscape_label(label_path, label_save_dir, line, color_map)
if __name__ == '__main__':
args = get_args()
image_dir = "leftImg8bit"
label_dir = "gtFine"
# 检查输入文件夹是否存在
if not os.path.exists(args.input):
raise "Directory or file not exist. {}".format(args.input)
# 清空输出文件夹
if os.path.exists(args.output):
shutil.rmtree(args.output)
# 读取color map
color_map = {}
with open(os.path.sep.join([args.input, "label_color_map.txt"])) as f:
for line in f:
line = line.strip()
data = line.split(" ")
color_map[data[0]] = [int(data[1]), (int(data[2]), int(data[3]), int(data[4]))]
# 处理数据集
dataset_type = "train"
deal_dataset(args, dataset_type, color_map)
dataset_type = "test"
deal_dataset(args, dataset_type, color_map)
- annotation.py
#!/usr/bin/python
#
# Classes to store, read, and write annotations
#
from __future__ import print_function, absolute_import, division
import os
import json
import numpy as np
from collections import namedtuple
# get current date and time
import datetime
import locale
from abc import ABCMeta, abstractmethod
# from box3dImageTransform import Camera
class Camera(object):
def __init__(
self,
fx,
fy,
u0,
v0,
sensor_T_ISO_8855,
imgWidth=2048,
imgHeight=1024):
self.fx = fx
self.fy = fy
self.u0 = u0
self.v0 = v0
self.sensor_T_ISO_8855 = sensor_T_ISO_8855
self.imgWidth = imgWidth
self.imgHeight = imgHeight
# A point in a polygon
Point = namedtuple('Point', ['x', 'y'])
class CsObjectType():
"""Type of an object"""
POLY = 1 # polygon
BBOX2D = 2 # bounding box
BBOX3D = 3 # 3d bounding box
IGNORE2D = 4 # 2d ignore region
class CsObject:
"""Abstract base class for annotation objects"""
__metaclass__ = ABCMeta
def __init__(self, objType):
self.objectType = objType
# the label
self.label = ""
# If deleted or not
self.deleted = 0
# If verified or not
self.verified = 0
# The date string
self.date = ""
# The username
self.user = ""
# Draw the object
# Not read from or written to JSON
# Set to False if deleted object
# Might be set to False by the application for other reasons
self.draw = True
@abstractmethod
def __str__(self): pass
@abstractmethod
def fromJsonText(self, jsonText, objId=-1): pass
@abstractmethod
def toJsonText(self): pass
def updateDate(self):
try:
locale.setlocale(locale.LC_ALL, 'en_US.utf8')
except locale.Error:
locale.setlocale(locale.LC_ALL, 'en_US')
except locale.Error:
locale.setlocale(locale.LC_ALL, 'us_us.utf8')
except locale.Error:
locale.setlocale(locale.LC_ALL, 'us_us')
except Exception:
pass
self.date = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
# Mark the object as deleted
def delete(self):
self.deleted = 1
self.draw = False
class CsPoly(CsObject):
"""Class that contains the information of a single annotated object as polygon"""
# Constructor
def __init__(self):
CsObject.__init__(self, CsObjectType.POLY)
# the polygon as list of points
self.polygon = []
# the object ID
self.id = -1
def __str__(self):
polyText = ""
if self.polygon:
if len(self.polygon) <= 4:
for p in self.polygon:
polyText += '({},{}) '.format(p.x, p.y)
else:
polyText += '({},{}) ({},{}) ... ({},{}) ({},{})'.format(
self.polygon[0].x, self.polygon[0].y,
self.polygon[1].x, self.polygon[1].y,
self.polygon[-2].x, self.polygon[-2].y,
self.polygon[-1].x, self.polygon[-1].y)
else:
polyText = "none"
text = "Object: {} - {}".format(self.label, polyText)
return text
def fromJsonText(self, jsonText, objId=-1):
self.id = objId
self.label = str(jsonText['label'])
self.polygon = [Point(p[0], p[1]) for p in jsonText['polygon']]
if 'deleted' in jsonText.keys():
self.deleted = jsonText['deleted']
else:
self.deleted = 0
if 'verified' in jsonText.keys():
self.verified = jsonText['verified']
else:
self.verified = 1
if 'user' in jsonText.keys():
self.user = jsonText['user']
else:
self.user = ''
if 'date' in jsonText.keys():
self.date = jsonText['date']
else:
self.date = ''
if self.deleted == 1:
self.draw = False
else:
self.draw = True
def toJsonText(self):
objDict = {}
objDict['label'] = self.label
objDict['id'] = self.id
objDict['deleted'] = self.deleted
objDict['verified'] = self.verified
objDict['user'] = self.user
objDict['date'] = self.date
objDict['polygon'] = []
for pt in self.polygon:
objDict['polygon'].append([pt.x, pt.y])
return objDict
class CsBbox2d(CsObject):
"""Class that contains the information of a single annotated object as bounding box"""
# Constructor
def __init__(self):
CsObject.__init__(self, CsObjectType.BBOX2D)
# the polygon as list of points
self.bbox_amodal_xywh = []
self.bbox_modal_xywh = []
# the ID of the corresponding object
self.instanceId = -1
# the label of the corresponding object
self.label = ""
def __str__(self):
bboxAmodalText = ""
bboxAmodalText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format(
self.bbox_amodal_xywh[0], self.bbox_amodal_xywh[1], self.bbox_amodal_xywh[2], self.bbox_amodal_xywh[3])
bboxModalText = ""
bboxModalText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format(
self.bbox_modal_xywh[0], self.bbox_modal_xywh[1], self.bbox_modal_xywh[2], self.bbox_modal_xywh[3])
text = "Object: {}\n - Amodal {}\n - Modal {}".format(
self.label, bboxAmodalText, bboxModalText)
return text
def setAmodalBox(self, bbox_amodal):
# sets the amodal box if required
self.bbox_amodal_xywh = [
bbox_amodal[0],
bbox_amodal[1],
bbox_amodal[2] - bbox_amodal[0],
bbox_amodal[3] - bbox_amodal[1]
]
# access 2d boxes in [xmin, ymin, xmax, ymax] format
@property
def bbox_amodal(self):
"""Returns the 2d box as [xmin, ymin, xmax, ymax]"""
return [
self.bbox_amodal_xywh[0],
self.bbox_amodal_xywh[1],
self.bbox_amodal_xywh[0] + self.bbox_amodal_xywh[2],
self.bbox_amodal_xywh[1] + self.bbox_amodal_xywh[3]
]
@property
def bbox_modal(self):
"""Returns the 2d box as [xmin, ymin, xmax, ymax]"""
return [
self.bbox_modal_xywh[0],
self.bbox_modal_xywh[1],
self.bbox_modal_xywh[0] + self.bbox_modal_xywh[2],
self.bbox_modal_xywh[1] + self.bbox_modal_xywh[3]
]
def fromJsonText(self, jsonText, objId=-1):
# try to load from cityperson format
if 'bbox' in jsonText.keys() and 'bboxVis' in jsonText.keys():
self.bbox_amodal_xywh = jsonText['bbox']
self.bbox_modal_xywh = jsonText['bboxVis']
# both modal and amodal boxes are provided
elif "modal" in jsonText.keys() and "amodal" in jsonText.keys():
self.bbox_amodal_xywh = jsonText['amodal']
self.bbox_modal_xywh = jsonText['modal']
# only amodal boxes are provided
else:
self.bbox_modal_xywh = jsonText['amodal']
self.bbox_amodal_xywh = jsonText['amodal']
# load label and instanceId if available
if 'label' in jsonText.keys() and 'instanceId' in jsonText.keys():
self.label = str(jsonText['label'])
self.instanceId = jsonText['instanceId']
def toJsonText(self):
objDict = {}
objDict['label'] = self.label
objDict['instanceId'] = self.instanceId
objDict['modal'] = self.bbox_modal_xywh
objDict['amodal'] = self.bbox_amodal_xywh
return objDict
class CsBbox3d(CsObject):
"""Class that contains the information of a single annotated object as 3D bounding box"""
# Constructor
def __init__(self):
CsObject.__init__(self, CsObjectType.BBOX3D)
self.bbox_2d = None
self.center = []
self.dims = []
self.rotation = []
self.instanceId = -1
self.label = ""
self.score = -1.
def __str__(self):
bbox2dText = str(self.bbox_2d)
bbox3dText = ""
bbox3dText += '\n - Center (x/y/z) [m]: {}/{}/{}'.format(
self.center[0], self.center[1], self.center[2])
bbox3dText += '\n - Dimensions (l/w/h) [m]: {}/{}/{}'.format(
self.dims[0], self.dims[1], self.dims[2])
bbox3dText += '\n - Rotation: {}/{}/{}/{}'.format(
self.rotation[0], self.rotation[1], self.rotation[2], self.rotation[3])
text = "Object: {}\n2D {}\n - 3D {}".format(
self.label, bbox2dText, bbox3dText)
return text
def fromJsonText(self, jsonText, objId=-1):
# load 2D box
self.bbox_2d = CsBbox2d()
self.bbox_2d.fromJsonText(jsonText['2d'])
self.center = jsonText['3d']['center']
self.dims = jsonText['3d']['dimensions']
self.rotation = jsonText['3d']['rotation']
self.label = jsonText['label']
self.score = jsonText['score']
if 'instanceId' in jsonText.keys():
self.instanceId = jsonText['instanceId']
def toJsonText(self):
objDict = {}
objDict['label'] = self.label
objDict['instanceId'] = self.instanceId
objDict['2d']['amodal'] = self.bbox_2d.bbox_amodal_xywh
objDict['2d']['modal'] = self.bbox_2d.bbox_modal_xywh
objDict['3d']['center'] = self.center
objDict['3d']['dimensions'] = self.dims
objDict['3d']['rotation'] = self.rotation
return objDict
@property
def depth(self):
# returns the BEV depth
return np.sqrt(self.center[0]**2 + self.center[1]**2).astype(int)
class CsIgnore2d(CsObject):
"""Class that contains the information of a single annotated 2d ignore region"""
# Constructor
def __init__(self):
CsObject.__init__(self, CsObjectType.IGNORE2D)
self.bbox_xywh = []
self.label = ""
self.instanceId = -1
def __str__(self):
bbox2dText = ""
bbox2dText += 'Ignore Region: (x1: {}, y1: {}), (w: {}, h: {})'.format(
self.bbox_xywh[0], self.bbox_xywh[1], self.bbox_xywh[2], self.bbox_xywh[3])
return bbox2dText
def fromJsonText(self, jsonText, objId=-1):
self.bbox_xywh = jsonText['2d']
if 'label' in jsonText.keys():
self.label = jsonText['label']
if 'instanceId' in jsonText.keys():
self.instanceId = jsonText['instanceId']
def toJsonText(self):
objDict = {}
objDict['label'] = self.label
objDict['instanceId'] = self.instanceId
objDict['2d'] = self.bbox_xywh
return objDict
@property
def bbox(self):
"""Returns the 2d box as [xmin, ymin, xmax, ymax]"""
return [
self.bbox_xywh[0],
self.bbox_xywh[1],
self.bbox_xywh[0] + self.bbox_xywh[2],
self.bbox_xywh[1] + self.bbox_xywh[3]
]
# Extend api to be compatible to bbox2d
@property
def bbox_amodal_xywh(self):
return self.bbox_xywh
@property
def bbox_modal_xywh(self):
return self.bbox_xywh
class Annotation:
"""The annotation of a whole image (doesn't support mixed annotations, i.e. combining CsPoly and CsBbox2d)"""
# Constructor
def __init__(self, objType=CsObjectType.POLY):
# the width of that image and thus of the label image
self.imgWidth = 0
# the height of that image and thus of the label image
self.imgHeight = 0
# the list of objects
self.objects = []
# the camera calibration
self.camera = None
assert objType in CsObjectType.__dict__.values()
self.objectType = objType
def toJson(self):
return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
def fromJsonText(self, jsonText):
jsonDict = json.loads(jsonText)
self.imgWidth = int(jsonDict['imgWidth'])
self.imgHeight = int(jsonDict['imgHeight'])
self.objects = []
# load objects
if self.objectType != CsObjectType.IGNORE2D:
for objId, objIn in enumerate(jsonDict['objects']):
if self.objectType == CsObjectType.POLY:
obj = CsPoly()
elif self.objectType == CsObjectType.BBOX2D:
obj = CsBbox2d()
elif self.objectType == CsObjectType.BBOX3D:
obj = CsBbox3d()
obj.fromJsonText(objIn, objId)
self.objects.append(obj)
# load ignores
if 'ignore' in jsonDict.keys():
for ignoreId, ignoreIn in enumerate(jsonDict['ignore']):
obj = CsIgnore2d()
obj.fromJsonText(ignoreIn, ignoreId)
self.objects.append(obj)
# load camera calibration
if 'sensor' in jsonDict.keys():
self.camera = Camera(fx=jsonDict['sensor']['fx'],
fy=jsonDict['sensor']['fy'],
u0=jsonDict['sensor']['u0'],
v0=jsonDict['sensor']['v0'],
sensor_T_ISO_8855=jsonDict['sensor']['sensor_T_ISO_8855'])
def toJsonText(self):
jsonDict = {}
jsonDict['imgWidth'] = self.imgWidth
jsonDict['imgHeight'] = self.imgHeight
jsonDict['objects'] = []
for obj in self.objects:
objDict = obj.toJsonText()
jsonDict['objects'].append(objDict)
return jsonDict
# Read a json formatted polygon file and return the annotation
def fromJsonFile(self, jsonFile):
if not os.path.isfile(jsonFile):
print('Given json file not found: {}'.format(jsonFile))
return
with open(jsonFile, 'r') as f:
jsonText = f.read()
self.fromJsonText(jsonText)
def toJsonFile(self, jsonFile):
with open(jsonFile, 'w') as f:
f.write(self.toJson())
# a dummy example
if __name__ == "__main__":
obj = CsPoly()
obj.label = 'car'
obj.polygon.append(Point(0, 0))
obj.polygon.append(Point(1, 0))
obj.polygon.append(Point(1, 1))
obj.polygon.append(Point(0, 1))
print(type(obj).__name__)
print(obj)
- json2labelImage.py
# Image processing
from PIL import Image
from PIL import ImageDraw
# Convert the given annotation to a label image
def createLabelImage(annotation, encoding, color_map, outline=None):
# the size of the image
size = ( annotation.imgWidth , annotation.imgHeight )
# the background
if encoding == "ids":
# background = name2label['unlabeled'].id
background = 0
elif encoding == "trainIds":
background = 255
elif encoding == "color":
background = (0, 0, 0)
else:
print("Unknown encoding '{}'".format(encoding))
return None
# this is the image that we want to create
if encoding == "color":
labelImg = Image.new("RGBA", size, background)
else:
labelImg = Image.new("L", size, background)
# a drawer to draw into the image
drawer = ImageDraw.Draw( labelImg )
# loop over all objects
for obj in annotation.objects:
label = obj.label
polygon = obj.polygon
# If the object is deleted, skip it
if obj.deleted:
continue
if encoding == "ids":
# val = name2label[label].id
val = color_map[label][0]
elif encoding == "trainIds":
# val = name2label[label].trainId
val = color_map[label][0]
elif encoding == "color":
# val = name2label[label].color
val = color_map[label][1]
try:
if outline:
drawer.polygon( polygon, fill=val, outline=outline )
else:
drawer.polygon( polygon, fill=val )
except:
print("Failed to draw polygon with label {}".format(label))
raise
return labelImg