目前支持mask-rcnn的框架很多,网上也有很多训练该网络的教程,例如tensorflow+keras训练mask-rcnn进行实例分割训练,但是其横撑的.h5模型比较难通过opencv自带的接口调用.查阅资料发现,opencv4.0版本之后,dnn模块提供的接口能够接入mask-rcnn的模型(pb格式).
小记下利用tensorflow object detection API自带的训练mask-rcnn的接口训练自己的数据过程.以备不时之需.
1.下载tensorflow model 源码
object detection API 是存放在tensorflow/model 路径下的,地址:https://github.com/tensorflow/models
git clone https://github.com/tensorflow/models.git
2.安装API
详见 https://blog.youkuaiyun.com/c20081052/article/details/81698216
3.数据采集
实例分割的数据标注比较麻烦,我这边推荐的是用labelme软件进行数据标注,打标软件的安装和使用可以借鉴下方博客 https://blog.youkuaiyun.com/u011574296/article/details/79740633/
值得注意的是,训练实例分割的数据同一类的标签也要有所区分,例如画面中4只狗2个人,对应的标签分别为dog1,dog2,dog3,dog4,person1,person2(如果不区分开,则训练的时候模型会认为同一类的不同个体都是同一个,无法区分个体)
标注完,会发现在存储路径下生成了标注文件(json文件)
4. 数据转换
API提供的接口接受的数据是tfrecord格式的,我们需要将json格式的数据转化为tfrecord格式.
create_tf_record.py 代码如下
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 26 10:57:09 2018
@author: shirhe-lyh
"""
"""Convert raw dataset to TFRecord for object_detection.
Please note that this tool only applies to labelme's annotations(json file).
Example usage:
python3 create_tf_record.py \
--images_dir=your absolute path to read images.
--annotations_json_dir=your path to annotaion json files.
--label_map_path=your path to label_map.pbtxt
--output_path=your path to write .record.
"""
import cv2
import glob
import hashlib
import io
import json
import numpy as np
import os
import PIL.Image
import tensorflow as tf
import read_pbtxt_file
flags = tf.app.flags
flags.DEFINE_string('images_dir', None, 'Path to images directory.')
flags.DEFINE_string('annotations_json_dir', 'datasets/annotations',
'Path to annotations directory.')
flags.DEFINE_string('label_map_path', None, 'Path to label map proto.')
flags.DEFINE_string('output_path', None, 'Path to the output tfrecord.')
FLAGS = flags.FLAGS
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def create_tf_example(annotation_dict, label_map_dict=None):
"""Converts image and annotations to a tf.Example proto.
Args:
annotation_dict: A dictionary containing the following keys:
['height', 'width', 'filename', 'sha256_key', 'encoded_jpg',
'format', 'xmins', 'xmaxs', 'ymins', 'ymaxs', 'masks',
'class_names'].
label_map_dict: A dictionary maping class_names to indices.
Returns:
example: The converted tf.Example.
Raises:
ValueError: If label_map_dict is None or is not containing a class_name.
"""
if annotation_dict is None:
return None
if label_map_dict is None:
raise ValueError('`label_map_dict` is None')
height = annotation_dict.get('height', None)
width = annotation_dict.get('width', None)
filename = annotation_dict.get('filename', None)
sha256_key = annotation_dict.get('sha256_key', None)
encoded_jpg = annotation_dict.get('encoded_jpg', None)
image_format = annotation_dict.get('format', None)
xmins = annotation_dict.get('xmins', None)
xmaxs = annotation_dict.get('xmaxs', None)
ymins = annotation_dict.get('ymins', None)
ymaxs = annotation_dict.get('ymaxs', None)
masks = annotation_dict.get('masks', None)
class_names = annotation_dict.get('class_names', None)
labels = []
for class_name in class_names:
label = label_map_dict.get(class_name, 'None')
if label is None:
raise ValueError('`label_map_dict` is not containing {}.'.format(
class_name))
labels.append(label)
encoded_masks = []
for mask in masks:
pil_image = PIL.Image.fromarray(mask.astype(np.uint8))
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_masks.append(output_io.getvalue())
feature_dict = {
'image/height': int64_feature(height),
'image/width': int64_feature(width),
'image/filename': bytes_feature(filename.encode('utf8')),
'image/source_id': bytes_feature(filename.encode('utf8')),
'image/key/sha256': bytes_feature(sha256_key.encode('utf8')),
'image/encoded': bytes_feature(encoded_jpg),
'image/format': bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/mask': bytes_list_feature(encoded_masks),
'image/object/class/label': int64_list_feature(labels)}
example = tf.train.Example(features=tf.train.Features(
feature=feature_dict))
return example
def _get_annotation_dict(images_dir, annotation_json_path):
"""Get boundingboxes and masks.
Args:
images_dir: Path to images directory.
annotation_json_path: Path to annotated json file corresponding to
the image. The json file annotated by labelme with keys:
['lineColor', 'imageData', 'fillColor', 'imagePath', 'shapes',
'flags'].
Returns:
annotation_dict: A dictionary containing the following keys:
['height', 'width', 'filename', 'sha256_key', 'encoded_jpg',
'format', 'xmins', 'xmaxs', 'ymins', 'ymaxs', 'masks',
'class_names'].
#
# Raises:
# ValueError: If images_dir or annotation_json_path is not exist.
"""
# if not os.path.exists(images_dir):
# raise ValueError('`images_dir` is not exist.')
#
# if not os.path.exists(annotation_json_path):
# raise ValueError('`annotation_json_path` is not exist.')
if (not os.path.exists(images_dir) or
not os.path.exists(annotation_json_path)):
return None
with open(annotation_json_path, 'r') as f:
json_text = json.load(f)
shapes = json_text.get('shapes', None)
if shapes is None:
return None
image_relative_path = json_text.get('imagePath', None)
if image_relative_path is None:
return None
image_name = image_relative_path.split('/')[-1]
image_path = os.path.join(images_dir, image_name)
image_format = image_name.split('.')[-1].replace('jpg', 'jpeg')
if not os.path.exists(image_path):
return None
with tf.gfile.GFile(image_path, 'rb') as fid:
encoded_jpg = fid.read()
image = cv2.imread(image_path)
height = image.shape[0]
width = image.shape[1]
key = hashlib.sha256(encoded_jpg).hexdigest()
xmins = []
xmaxs = []
ymins = []
ymaxs = []
masks = []
class_names = []
hole_polygons = []
for mark in shapes:
class_name = mark.get('label')
class_names.append(class_name)
polygon = mark.get('points')
polygon = np.array(polygon)
if class_name == 'hole':
hole_polygons.append(polygon)
else:
mask = np.zeros(image.shape[:2])
cv2.fillPoly(mask, [polygon], 1)
masks.append(mask)
# Boundingbox
x = polygon[:, 0]
y = polygon[:, 1]
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
xmins.append(float(xmin) / width)
xmaxs.append(float(xmax) / width)
ymins.append(float(ymin) / height)
ymaxs.append(float(ymax) / height)
# Remove holes in mask
for mask in masks:
mask = cv2.fillPoly(mask, hole_polygons, 0)
annotation_dict = {'height': height,
'width': width,
'filename': image_name,
'sha256_key': key,
'encoded_jpg': encoded_jpg,
'format': image_format,
'xmins': xmins,
'xmaxs': xmaxs,
'ymins': ymins,
'ymaxs': ymaxs,
'masks': masks,
'class_names': class_names}
return annotation_dict
def main(_):
if not os.path.exists(FLAGS.images_dir):
raise ValueError('`images_dir` is not exist.')
if not os.path.exists(FLAGS.annotations_json_dir):
raise ValueError('`annotations_json_dir` is not exist.')
if not os.path.exists(FLAGS.label_map_path):
raise ValueError('`label_map_path` is not exist.')
label_map = read_pbtxt_file.get_label_map_dict(FLAGS.label_map_path)
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
num_annotations_skiped = 0
annotations_json_path = os.path.join(FLAGS.annotations_json_dir, '*.json')
for i, annotation_file in enumerate(glob.glob(annotations_json_path)):
if i % 100 == 0:
print('On image %d', i)
annotation_dict = _get_annotation_dict(
FLAGS.images_dir, annotation_file)
if annotation_dict is None:
num_annotations_skiped += 1
continue
tf_example = create_tf_example(annotation_dict, label_map)
writer.write(tf_example.SerializeToString())
print('Successfully created TFRecord to {}.'.format(FLAGS.output_path))
if __name__ == '__main__':
tf.app.run()
然后在同级文件目录下创建 read_pbtxt_file.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 26 13:42:50 2018
@author: shirhe-lyh
"""
"""A tool to read .pbtxt file.
See Details at:
TensorFlow models/research/object_detetion/protos/string_int_label_pb2.py
TensorFlow models/research/object_detection/utils/label_map_util.py
"""
import tensorflow as tf
from google.protobuf import text_format
import string_int_label_map_pb2
def load_pbtxt_file(path):
"""Read .pbtxt file.
Args:
path: Path to StringIntLabelMap proto text file (.pbtxt file).
Returns:
A StringIntLabelMapProto.
Raises:
ValueError: If path is not exist.
"""
if not tf.gfile.Exists(path):
raise ValueError('`path` is not exist.')
with tf.gfile.GFile(path, 'r') as fid:
pbtxt_string = fid.read()
pbtxt = string_int_label_map_pb2.StringIntLabelMap()
try:
text_format.Merge(pbtxt_string, pbtxt)
except text_format.ParseError:
pbtxt.ParseFromString(pbtxt_string)
return pbtxt
def get_label_map_dict(path):
"""Reads a .pbtxt file and returns a dictionary.
Args:
path: Path to StringIntLabelMap proto text file.
Returns:
A dictionary mapping class names to indices.
"""
pbtxt = load_pbtxt_file(path)
result_dict = {}
for item in pbtxt.item:
result_dict[item.name] = item.id
return result_dict
创建文件string_int_label_map_pb2.py
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: object_detection/protos/string_int_label_map.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='object_detection/protos/string_int_label_map.proto',
package='object_detection.protos',
syntax='proto2',
serialized_options=None,
serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem')
)
_STRINGINTLABELMAPITEM = _descriptor.Descriptor(
name='StringIntLabelMapItem',
full_name='object_detection.protos.StringIntLabelMapItem',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=79,
serialized_end=150,
)
_STRINGINTLABELMAP = _descriptor.Descriptor(
name='StringIntLabelMap',
full_name='object_detection.protos.StringIntLabelMap',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=152,
serialized_end=233,
)
_STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM
DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM
DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict(
DESCRIPTOR = _STRINGINTLABELMAPITEM,
__module__ = 'object_detection.protos.string_int_label_map_pb2'
# @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
))
_sym_db.RegisterMessage(StringIntLabelMapItem)
StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict(
DESCRIPTOR = _STRINGINTLABELMAP,
__module__ = 'object_detection.protos.string_int_label_map_pb2'
# @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
))
_sym_db.RegisterMessage(StringIntLabelMap)
# @@protoc_insertion_point(module_scope)
执行下方命令行 可获得tfrecord数据格式的文件
python3 create_tf_record.py \
--images_dir=./datasets/images \#存放的训练数据图片文件夹路径
--annotations_json_dir=./datasets/annotations \#存放的训练标注数据的文件夹
--label_map_path=./training/Abyssinian_label_map.pbtxt \#训练数据的标签文件路径
--output_path=./test.record #生成的tfrecord数据文件
其中训练数据的标签文件(我的工程中是Abyssinian_label_map.pbtxt)
item {
id: 1
name: 'Abyssinian'
}
item {
id: 2
name: 'cat'
}
item {
id: 3
name: 'dog'
}
标签类别数和名称都可以根据实际项目更改.
至此你可以将标注的数据转化为tfrecord格式.
准备好train.record 和val.record就可以开始下一步啦!
后续见下文!