转载自 https://blog.youkuaiyun.com/qq_21368481/article/details/80246028
voc_layers.py是FCN中利用python写的数据层(即使用caffe的Python API 写的数据输入层),其格式是相对固定的,包含setup()、reshape()、forward()、backward()四个必要函数。
其源码如下:
-
import caffe
-
-
import numpy
as np
-
from PIL
import Image
-
-
import random
-
-
class VOCSegDataLayer(caffe.Layer):
-
"""
-
Load (input image, label image) pairs from PASCAL VOC
-
one-at-a-time while reshaping the net to preserve dimensions.
-
-
Use this to feed data to a fully convolutional network.
-
"""
-
-
def setup(self, bottom, top):
-
"""
-
Setup data layer according to parameters:
-
-
- voc_dir: path to PASCAL VOC year dir
-
- split: train / val / test
-
- mean: tuple of mean values to subtract
-
- randomize: load in random order (default: True)
-
- seed: seed for randomization (default: None / current time)
-
-
for PASCAL VOC semantic segmentation.
-
-
example
-
-
params = dict(voc_dir="/path/to/PASCAL/VOC2011",
-
mean=(104.00698793, 116.66876762, 122.67891434),
-
split="val")
-
"""
-
# config
-
params = eval(self.param_str)
-
self.voc_dir = params[
'voc_dir']
-
self.split = params[
'split']
-
self.mean = np.array(params[
'mean'])
-
self.random = params.get(
'randomize',
True)
-
self.seed = params.get(
'seed',
None)
-
-
# two tops: data and label
-
if len(top) !=
2:
-
raise Exception(
"Need to define two tops: data and label.")
-
# data layers have no bottoms
-
if len(bottom) !=
0:
-
raise Exception(
"Do not define a bottom.")
-
-
# load indices for images and labels
-
split_f =
'{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
-
self.split)
-
self.indices = open(split_f,
'r').read().splitlines()
-
self.idx =
0
-
-
# make eval deterministic
-
if
'train'
not
in self.split:
-
self.random =
False
-
-
# randomization: seed and pick
-
if self.random:
-
random.seed(self.seed)
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
-
-
def reshape(self, bottom, top):
-
# load image + label image pair
-
self.data = self.load_image(self.indices[self.idx])
-
self.label = self.load_label(self.indices[self.idx])
-
# reshape tops to fit (leading 1 is for batch dimension)
-
top[
0].reshape(
1, *self.data.shape)
-
top[
1].reshape(
1, *self.label.shape)
-
-
-
def forward(self, bottom, top):
-
# assign output
-
top[
0].data[...] = self.data
-
top[
1].data[...] = self.label
-
-
# pick next input
-
if self.random:
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
else:
-
self.idx +=
1
-
if self.idx == len(self.indices):
-
self.idx =
0
-
-
-
def backward(self, top, propagate_down, bottom):
-
pass
-
-
-
def load_image(self, idx):
-
"""
-
Load input image and preprocess for Caffe:
-
- cast to float
-
- switch channels RGB -> BGR
-
- subtract mean
-
- transpose to channel x height x width order
-
"""
-
im = Image.open(
'{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
-
in_ = np.array(im, dtype=np.float32)
-
in_ = in_[:,:,::
-1]
-
in_ -= self.mean
-
in_ = in_.transpose((
2,
0,
1))
-
return in_
-
-
-
def load_label(self, idx):
-
"""
-
Load label image as 1 x height x width integer array of label indices.
-
The leading singleton dimension is required by the loss.
-
"""
-
im = Image.open(
'{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
-
label = np.array(im, dtype=np.uint8)
-
label = label[np.newaxis, ...]
-
return label
-
-
-
class SBDDSegDataLayer(caffe.Layer):
-
"""
-
Load (input image, label image) pairs from the SBDD extended labeling
-
of PASCAL VOC for semantic segmentation
-
one-at-a-time while reshaping the net to preserve dimensions.
-
-
Use this to feed data to a fully convolutional network.
-
"""
-
-
def setup(self, bottom, top):
-
"""
-
Setup data layer according to parameters:
-
-
- sbdd_dir: path to SBDD `dataset` dir
-
- split: train / seg11valid
-
- mean: tuple of mean values to subtract
-
- randomize: load in random order (default: True)
-
- seed: seed for randomization (default: None / current time)
-
-
for SBDD semantic segmentation.
-
-
N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
-
Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
-
-
example
-
-
params = dict(sbdd_dir="/path/to/SBDD/dataset",
-
mean=(104.00698793, 116.66876762, 122.67891434),
-
split="valid")
-
"""
-
# config
-
params = eval(self.param_str)
-
self.sbdd_dir = params[
'sbdd_dir']
-
self.split = params[
'split']
-
self.mean = np.array(params[
'mean'])
-
self.random = params.get(
'randomize',
True)
-
self.seed = params.get(
'seed',
None)
-
-
# two tops: data and label
-
if len(top) !=
2:
-
raise Exception(
"Need to define two tops: data and label.")
-
# data layers have no bottoms
-
if len(bottom) !=
0:
-
raise Exception(
"Do not define a bottom.")
-
-
# load indices for images and labels
-
split_f =
'{}/{}.txt'.format(self.sbdd_dir,
-
self.split)
-
self.indices = open(split_f,
'r').read().splitlines()
-
self.idx =
0
-
-
# make eval deterministic
-
if
'train'
not
in self.split:
-
self.random =
False
-
-
# randomization: seed and pick
-
if self.random:
-
random.seed(self.seed)
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
-
-
def reshape(self, bottom, top):
-
# load image + label image pair
-
self.data = self.load_image(self.indices[self.idx])
-
self.label = self.load_label(self.indices[self.idx])
-
# reshape tops to fit (leading 1 is for batch dimension)
-
top[
0].reshape(
1, *self.data.shape)
-
top[
1].reshape(
1, *self.label.shape)
-
-
-
def forward(self, bottom, top):
-
# assign output
-
top[
0].data[...] = self.data
-
top[
1].data[...] = self.label
-
-
# pick next input
-
if self.random:
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
else:
-
self.idx +=
1
-
if self.idx == len(self.indices):
-
self.idx =
0
-
-
-
def backward(self, top, propagate_down, bottom):
-
pass
-
-
-
def load_image(self, idx):
-
"""
-
Load input image and preprocess for Caffe:
-
- cast to float
-
- switch channels RGB -> BGR
-
- subtract mean
-
- transpose to channel x height x width order
-
"""
-
im = Image.open(
'{}/img/{}.jpg'.format(self.sbdd_dir, idx))
-
in_ = np.array(im, dtype=np.float32)
-
in_ = in_[:,:,::
-1]
-
in_ -= self.mean
-
in_ = in_.transpose((
2,
0,
1))
-
return in_
-
-
-
def load_label(self, idx):
-
"""
-
Load label image as 1 x height x width integer array of label indices.
-
The leading singleton dimension is required by the loss.
-
"""
-
import scipy.io
-
mat = scipy.io.loadmat(
'{}/cls/{}.mat'.format(self.sbdd_dir, idx))
-
label = mat[
'GTcls'][
0][
'Segmentation'][
0].astype(np.uint8)
-
label = label[np.newaxis, ...]
-
return label
详细代码解读如下
1.VOCSegDataLayer类(也即net.py中申明的测试时的输入层pylayer)
此类对应于val.prototxt中的输入层,即:
-
layer {
-
name:
"data"
-
type:
"Python"
-
top:
"data"
-
top:
"label"
-
python_param {
-
module:
"voc_layers"
-
layer:
"VOCSegDataLayer"
-
param_str:
"{\'voc_dir\': \'../data/VOC2012\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
-
}
-
}
具体源码解读如下
-
#测试时用到的定义数据层的VOCSegDataLayer类(对应于测试集或验证集),类中根据caffe提供的python接口定义相
-
#应的函数,详细可参见https://chrischoy.github.io/research/caffe-python-layer/
-
class VOCSegDataLayer(caffe.Layer):
-
"""
-
Load (input image, label image) pairs from PASCAL VOC
-
one-at-a-time while reshaping the net to preserve dimensions.
-
-
Use this to feed data to a fully convolutional network.
-
"""
-
#setup函数,根据相应参数设置数据层
-
def setup(self, bottom, top):
-
"""
-
Setup data layer according to parameters:
-
-
- voc_dir: path to PASCAL VOC year dir 测试集或验证集的路径
-
- split: train / val / test split可以为train/val/test中的任意一者(即也可以看看训练集的效果)
-
- mean: tuple of mean values to subtract 存储着所要减去的平均值(减去平均值可以加速迭代)
-
- randomize: load in random order (default: True) 当randomize=True时,开启随机加载图片模式
-
- seed: seed for randomization (default: None / current time) 随机模式的种子(默认值为None)
-
-
for PASCAL VOC semantic segmentation.
-
-
example
-
-
params = dict(voc_dir="/path/to/PASCAL/VOC2011",
-
mean=(104.00698793, 116.66876762, 122.67891434),
-
split="val")
-
"""
-
# config
-
params = eval(self.param_str)
#读入参数
-
self.voc_dir = params[
'voc_dir']
-
self.split = params[
'split']
-
self.mean = np.array(params[
'mean'])
-
self.random = params.get(
'randomize',
True)
-
self.seed = params.get(
'seed',
None)
-
-
# two tops: data and label
-
#判断输出是否包含数据和标记
-
if len(top) !=
2:
-
raise Exception(
"Need to define two tops: data and label.")
-
# data layers have no bottoms
-
#判断是否有输入(数据层不需要定义输入bottom)
-
if len(bottom) !=
0:
-
raise Exception(
"Do not define a bottom.")
-
-
# load indices for images and labels
-
#获取所需要加载的图片的编号(即读取'split'.txt文档中的图片索引编号,这些编号其实是图片名)
-
split_f =
'{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
-
self.split)
#第一个{}即self.voc_dir;第二个{}即self.split
-
#splitlines(),按行('\r', '\r\n', \n')分隔,返回一个包含各行作为元素的列表
-
#即indices是所有图片编号的列表(按行存放成一列)
-
self.indices = open(split_f,
'r').read().splitlines()
-
self.idx =
0
#indices列表索引指针idx初始化为0
-
-
# make eval deterministic
-
#当split=test或者val时,不需要开启随机模式
-
if
'train'
not
in self.split:
-
self.random =
False
-
-
# randomization: seed and pick
-
#判断是否开启随机读取图片模式
-
if self.random:
-
random.seed(self.seed)
-
#随机生成一个整数作为索引号idx(范围为0~(len(self.indices)-1))
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
-
#reshape函数,根据索引号idx加载相应图片,并调整数据层的大小
-
def reshape(self, bottom, top):
-
# load image + label image pair
-
#load_image()和load_label()函数在后面定义
-
self.data = self.load_image(self.indices[self.idx])
-
self.label = self.load_label(self.indices[self.idx])
-
# reshape tops to fit (leading 1 is for batch dimension)
-
'''
-
重新调整数据层的大小(即caffe所加载的数据层的大小在每次迭代训练中是可以变的,
-
因为数据层的大小并不影响各层参数的大小)
-
caffe中的数据按N*C*H*W存储的,N为batch size,C为通道数,H和W分别为长和宽,这里的1即为batch size
-
也就对应了FCN论文中所讲到的采用SGD算法(随机梯度下降法,每一迭代训练的图片数为1)
-
'''
-
top[
0].reshape(
1, *self.data.shape)
#data
-
top[
1].reshape(
1, *self.label.shape)
#label
-
-
#定义前向传播函数forward(),数据层的前向传播不对数据进行任何操作,只是简单的输出数据本身
-
def forward(self, bottom, top):
-
# assign output
-
top[
0].data[...] = self.data
-
top[
1].data[...] = self.label
-
#输出数据的同时,进行下一次迭代时所需要的图片的选择(即产生下一个索引号idx)
-
# pick next input
-
if self.random:
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
else:
-
self.idx +=
1
-
if self.idx == len(self.indices):
-
self.idx =
0
-
-
#数据层不需要后向传播,直接pass
-
def backward(self, top, propagate_down, bottom):
-
pass
-
-
#加载图片的函数(根据索引号idx进行加载)
-
def load_image(self, idx):
-
"""
-
Load input image and preprocess for Caffe: 加载图片并处理成caffe的数据格式
-
- cast to float 转换为float型
-
- switch channels RGB -> BGR 交换通道位置,即R通道和B通道交换(感觉是用了opencv库的原因)
-
- subtract mean 减去均值
-
- transpose to channel x height x width order 将通道数放在前面(对应caffe数据存储的格式)
-
"""
-
im = Image.open(
'{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
-
in_ = np.array(im, dtype=np.float32)
-
in_ = in_[:,:,::
-1]
#-1表示从最后一维开始往前读取数据。即交换R通道和B通道
-
in_ -= self.mean
#减去均值
-
in_ = in_.transpose((
2,
0,
1))
#将通道数放在前面
-
return in_
python中的PIL所读取的三通道彩色图片是按H*W*C存放的,且三通道是顺序是标准的RGB顺序,输入到caffe中处理前,需要进行相应的转换。
caffe中的数据存储方式是N*C*H*W,且是按BGR顺序存放三通道的。
所有需要先进行RGB转换到BGR,具体可直接使用 in_ = in_[:,:,::-1]语句实现,具体理解可参见以下例子(其中a的第三维可看成是C,且按RGB顺序存放,前两维可看成是H和W):
-
import numpy
as np
-
a = np.array([[[
1,
2,
3],[
4,
5,
6]],[[
7,
8,
9],[
10,
11,
12]],[[
13,
14,
15],[
16,
17,
18]]])
-
print(str(a.shape))
-
print(str(a))
-
a = a[:,:,::
-1]
-
#a = a.transpose((2,0,1))
-
print(str(a.shape))
-
print(str(a))
以这个例子来说,第一行第一列所在位置的像素点的像素值分别为:R=1,G=2,B=3
运行结果为(可以看出第一行第一列所在位置的像素点的像素值分别为:B=3,G=2,R=1):
-
(
3L,
2L,
3L)
-
[[[
1
2
3]
-
[
4
5
6]]
-
-
[[
7
8
9]
-
[
10
11
12]]
-
-
[[
13
14
15]
-
[
16
17
18]]]
-
(
3L,
2L,
3L)
-
[[[
3
2
1]
-
[
6
5
4]]
-
-
[[
9
8
7]
-
[
12
11
10]]
-
-
[[
15
14
13]
-
[
18
17
16]]]
在此基础上还需要减去各个通道的均值,来进行均值归一化来加速算法执行速度。
最后按照caffe的存储数据的格式将通道数放在前面,即利用python中的transpose()函数进行转置操作,具体理解参见以下例子:
-
import numpy
as np
-
a = np.array([[[
1,
2,
3],[
4,
5,
6]],[[
7,
8,
9],[
10,
11,
12]],[[
13,
14,
15],[
16,
17,
18]]])
-
print(str(a.shape))
-
print(str(a))
-
a = a[:,:,::
-1]
-
a = a.transpose((
2,
0,
1))
-
print(str(a.shape))
-
print(str(a))
运行结果如下(由此可看出原来的第三维变到了第一维):
-
(
3L,
2L,
3L)
-
[[[
1
2
3]
-
[
4
5
6]]
-
-
[[
7
8
9]
-
[
10
11
12]]
-
-
[[
13
14
15]
-
[
16
17
18]]]
-
(
3L,
3L,
2L)
-
[[[
3
6]
-
[
9
12]
-
[
15
18]]
-
-
[[
2
5]
-
[
8
11]
-
[
14
17]]
-
-
[[
1
4]
-
[
7
10]
-
[
13
16]]]
即,例如结果中的
-
[[
3
6]
-
[
9
12]
-
[
15
18]]
表示的是所有像素点的B通道的像素数值,也即表示的是原图像的B通道。
-
#加载标记的函数(按照索引号idx加载相应的label图片)
-
def load_label(self, idx):
-
"""
-
Load label image as 1 x height x width integer array of label indices.
-
The leading singleton dimension is required by the loss.
-
"""
-
im = Image.open(
'{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
-
label = np.array(im, dtype=np.uint8)
#标签是单通道的
-
#np.newaxis的功能是插入新维度,即将原来的H×W转换为1×H×W
-
label = label[np.newaxis, ...]
-
return label
2.SBDDSegDataLayer类(也即net.py中申明的训练时的输入层pylayer)
此类对应于train.prototxt中的输入层,即:
-
layer {
-
name:
"data"
-
type:
"Python"
-
top:
"data"
-
top:
"label"
-
python_param {
-
module:
"voc_layers"
-
layer:
"SBDDSegDataLayer"
-
param_str:
"{\'sbdd_dir\': \'../data/VOC2012\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
-
}
-
}
SBDDSegDataLayer类的代码和VOCSegDataLayer类类似,在此不再重复解读,就其中的一小点进行说明。
-
#训练时用到的定义数据层的SBDDSegDataLayer类(对应于训练集),类中根据caffe提供的python接口定义相
-
#应的函数
-
class SBDDSegDataLayer(caffe.Layer):
-
"""
-
Load (input image, label image) pairs from the SBDD extended labeling
-
of PASCAL VOC for semantic segmentation
-
one-at-a-time while reshaping the net to preserve dimensions.
-
-
Use this to feed data to a fully convolutional network.
-
"""
-
-
def setup(self, bottom, top):
-
"""
-
Setup data layer according to parameters:
-
-
- sbdd_dir: path to SBDD `dataset` dir
-
- split: train / seg11valid
-
- mean: tuple of mean values to subtract
-
- randomize: load in random order (default: True)
-
- seed: seed for randomization (default: None / current time)
-
-
for SBDD semantic segmentation.
-
-
N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
-
Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
-
-
example
-
-
params = dict(sbdd_dir="/path/to/SBDD/dataset",
-
mean=(104.00698793, 116.66876762, 122.67891434),
-
split="valid")
-
"""
-
# config
-
params = eval(self.param_str)
-
self.sbdd_dir = params[
'sbdd_dir']
-
self.split = params[
'split']
-
self.mean = np.array(params[
'mean'])
-
self.random = params.get(
'randomize',
True)
-
self.seed = params.get(
'seed',
None)
-
-
# two tops: data and label
-
if len(top) !=
2:
-
raise Exception(
"Need to define two tops: data and label.")
-
# data layers have no bottoms
-
if len(bottom) !=
0:
-
raise Exception(
"Do not define a bottom.")
-
-
# load indices for images and labels
-
split_f =
'{}/{}.txt'.format(self.sbdd_dir,
-
self.split)
-
self.indices = open(split_f,
'r').read().splitlines()
-
self.idx =
0
-
-
# make eval deterministic
-
if
'train'
not
in self.split:
-
self.random =
False
-
-
# randomization: seed and pick
-
if self.random:
-
random.seed(self.seed)
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
-
-
def reshape(self, bottom, top):
-
# load image + label image pair
-
self.data = self.load_image(self.indices[self.idx])
-
self.label = self.load_label(self.indices[self.idx])
-
# reshape tops to fit (leading 1 is for batch dimension)
-
top[
0].reshape(
1, *self.data.shape)
-
top[
1].reshape(
1, *self.label.shape)
-
-
-
def forward(self, bottom, top):
-
# assign output
-
top[
0].data[...] = self.data
-
top[
1].data[...] = self.label
-
-
# pick next input
-
if self.random:
-
self.idx = random.randint(
0, len(self.indices)
-1)
-
else:
-
self.idx +=
1
-
if self.idx == len(self.indices):
-
self.idx =
0
-
-
-
def backward(self, top, propagate_down, bottom):
-
pass
-
-
-
def load_image(self, idx):
-
"""
-
Load input image and preprocess for Caffe:
-
- cast to float
-
- switch channels RGB -> BGR
-
- subtract mean
-
- transpose to channel x height x width order
-
"""
-
im = Image.open(
'{}/img/{}.jpg'.format(self.sbdd_dir, idx))
-
in_ = np.array(im, dtype=np.float32)
-
in_ = in_[:,:,::
-1]
-
in_ -= self.mean
-
in_ = in_.transpose((
2,
0,
1))
-
return in_
-
-
-
def load_label(self, idx):
-
"""
-
Load label image as 1 x height x width integer array of label indices.
-
The leading singleton dimension is required by the loss.
-
"""
-
import scipy.io
-
mat = scipy.io.loadmat(
'{}/cls/{}.mat'.format(self.sbdd_dir, idx))
#训练集的标签为.mat格式
-
label = mat[
'GTcls'][
0][
'Segmentation'][
0].astype(np.uint8)
-
label = label[np.newaxis, ...]
-
return label
SBDDSegDataLayer类所加载的训练样本的标记图片是按Matlab的mat进行存储的,但实际使用时,我们没有必要按照mat格式来加载标记图片,可参见VOCSegDataLayer类直接读取.png或.jpg格式的标记图片,即可将这个load_label()函数修改为:
-
def load_label(self, idx):
-
"""
-
Load label image as 1 x height x width integer array of label indices.
-
The leading singleton dimension is required by the loss.
-
"""
-
im = Image.open(
'{}/SegmentationClass/{}.png'.format(self.sbdd_dir, idx))
-
label = np.array(im, dtype=np.uint8)
-
label = label[np.newaxis, ...]
-
return label