FCN源码解读之voc_layers.py

本文深入解析了FCN中用于图像语义分割的VOCSegDataLayer和SBDDSegDataLayer数据层,详细介绍了这两个层的实现原理和代码解读,包括setup、reshape、forward和backward等关键函数的作用。

转载自 https://blog.youkuaiyun.com/qq_21368481/article/details/80246028

voc_layers.py是FCN中利用python写的数据层(即使用caffe的Python API 写的数据输入层),其格式是相对固定的,包含setup()、reshape()、forward()、backward()四个必要函数。

其源码如下:


   
  1. import caffe
  2. import numpy as np
  3. from PIL import Image
  4. import random
  5. class VOCSegDataLayer(caffe.Layer):
  6. """
  7. Load (input image, label image) pairs from PASCAL VOC
  8. one-at-a-time while reshaping the net to preserve dimensions.
  9. Use this to feed data to a fully convolutional network.
  10. """
  11. def setup(self, bottom, top):
  12. """
  13. Setup data layer according to parameters:
  14. - voc_dir: path to PASCAL VOC year dir
  15. - split: train / val / test
  16. - mean: tuple of mean values to subtract
  17. - randomize: load in random order (default: True)
  18. - seed: seed for randomization (default: None / current time)
  19. for PASCAL VOC semantic segmentation.
  20. example
  21. params = dict(voc_dir="/path/to/PASCAL/VOC2011",
  22. mean=(104.00698793, 116.66876762, 122.67891434),
  23. split="val")
  24. """
  25. # config
  26. params = eval(self.param_str)
  27. self.voc_dir = params[ 'voc_dir']
  28. self.split = params[ 'split']
  29. self.mean = np.array(params[ 'mean'])
  30. self.random = params.get( 'randomize', True)
  31. self.seed = params.get( 'seed', None)
  32. # two tops: data and label
  33. if len(top) != 2:
  34. raise Exception( "Need to define two tops: data and label.")
  35. # data layers have no bottoms
  36. if len(bottom) != 0:
  37. raise Exception( "Do not define a bottom.")
  38. # load indices for images and labels
  39. split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
  40. self.split)
  41. self.indices = open(split_f, 'r').read().splitlines()
  42. self.idx = 0
  43. # make eval deterministic
  44. if 'train' not in self.split:
  45. self.random = False
  46. # randomization: seed and pick
  47. if self.random:
  48. random.seed(self.seed)
  49. self.idx = random.randint( 0, len(self.indices) -1)
  50. def reshape(self, bottom, top):
  51. # load image + label image pair
  52. self.data = self.load_image(self.indices[self.idx])
  53. self.label = self.load_label(self.indices[self.idx])
  54. # reshape tops to fit (leading 1 is for batch dimension)
  55. top[ 0].reshape( 1, *self.data.shape)
  56. top[ 1].reshape( 1, *self.label.shape)
  57. def forward(self, bottom, top):
  58. # assign output
  59. top[ 0].data[...] = self.data
  60. top[ 1].data[...] = self.label
  61. # pick next input
  62. if self.random:
  63. self.idx = random.randint( 0, len(self.indices) -1)
  64. else:
  65. self.idx += 1
  66. if self.idx == len(self.indices):
  67. self.idx = 0
  68. def backward(self, top, propagate_down, bottom):
  69. pass
  70. def load_image(self, idx):
  71. """
  72. Load input image and preprocess for Caffe:
  73. - cast to float
  74. - switch channels RGB -> BGR
  75. - subtract mean
  76. - transpose to channel x height x width order
  77. """
  78. im = Image.open( '{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
  79. in_ = np.array(im, dtype=np.float32)
  80. in_ = in_[:,:,:: -1]
  81. in_ -= self.mean
  82. in_ = in_.transpose(( 2, 0, 1))
  83. return in_
  84. def load_label(self, idx):
  85. """
  86. Load label image as 1 x height x width integer array of label indices.
  87. The leading singleton dimension is required by the loss.
  88. """
  89. im = Image.open( '{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
  90. label = np.array(im, dtype=np.uint8)
  91. label = label[np.newaxis, ...]
  92. return label
  93. class SBDDSegDataLayer(caffe.Layer):
  94. """
  95. Load (input image, label image) pairs from the SBDD extended labeling
  96. of PASCAL VOC for semantic segmentation
  97. one-at-a-time while reshaping the net to preserve dimensions.
  98. Use this to feed data to a fully convolutional network.
  99. """
  100. def setup(self, bottom, top):
  101. """
  102. Setup data layer according to parameters:
  103. - sbdd_dir: path to SBDD `dataset` dir
  104. - split: train / seg11valid
  105. - mean: tuple of mean values to subtract
  106. - randomize: load in random order (default: True)
  107. - seed: seed for randomization (default: None / current time)
  108. for SBDD semantic segmentation.
  109. N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
  110. Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
  111. example
  112. params = dict(sbdd_dir="/path/to/SBDD/dataset",
  113. mean=(104.00698793, 116.66876762, 122.67891434),
  114. split="valid")
  115. """
  116. # config
  117. params = eval(self.param_str)
  118. self.sbdd_dir = params[ 'sbdd_dir']
  119. self.split = params[ 'split']
  120. self.mean = np.array(params[ 'mean'])
  121. self.random = params.get( 'randomize', True)
  122. self.seed = params.get( 'seed', None)
  123. # two tops: data and label
  124. if len(top) != 2:
  125. raise Exception( "Need to define two tops: data and label.")
  126. # data layers have no bottoms
  127. if len(bottom) != 0:
  128. raise Exception( "Do not define a bottom.")
  129. # load indices for images and labels
  130. split_f = '{}/{}.txt'.format(self.sbdd_dir,
  131. self.split)
  132. self.indices = open(split_f, 'r').read().splitlines()
  133. self.idx = 0
  134. # make eval deterministic
  135. if 'train' not in self.split:
  136. self.random = False
  137. # randomization: seed and pick
  138. if self.random:
  139. random.seed(self.seed)
  140. self.idx = random.randint( 0, len(self.indices) -1)
  141. def reshape(self, bottom, top):
  142. # load image + label image pair
  143. self.data = self.load_image(self.indices[self.idx])
  144. self.label = self.load_label(self.indices[self.idx])
  145. # reshape tops to fit (leading 1 is for batch dimension)
  146. top[ 0].reshape( 1, *self.data.shape)
  147. top[ 1].reshape( 1, *self.label.shape)
  148. def forward(self, bottom, top):
  149. # assign output
  150. top[ 0].data[...] = self.data
  151. top[ 1].data[...] = self.label
  152. # pick next input
  153. if self.random:
  154. self.idx = random.randint( 0, len(self.indices) -1)
  155. else:
  156. self.idx += 1
  157. if self.idx == len(self.indices):
  158. self.idx = 0
  159. def backward(self, top, propagate_down, bottom):
  160. pass
  161. def load_image(self, idx):
  162. """
  163. Load input image and preprocess for Caffe:
  164. - cast to float
  165. - switch channels RGB -> BGR
  166. - subtract mean
  167. - transpose to channel x height x width order
  168. """
  169. im = Image.open( '{}/img/{}.jpg'.format(self.sbdd_dir, idx))
  170. in_ = np.array(im, dtype=np.float32)
  171. in_ = in_[:,:,:: -1]
  172. in_ -= self.mean
  173. in_ = in_.transpose(( 2, 0, 1))
  174. return in_
  175. def load_label(self, idx):
  176. """
  177. Load label image as 1 x height x width integer array of label indices.
  178. The leading singleton dimension is required by the loss.
  179. """
  180. import scipy.io
  181. mat = scipy.io.loadmat( '{}/cls/{}.mat'.format(self.sbdd_dir, idx))
  182. label = mat[ 'GTcls'][ 0][ 'Segmentation'][ 0].astype(np.uint8)
  183. label = label[np.newaxis, ...]
  184. return label

详细代码解读如下

1.VOCSegDataLayer类(也即net.py中申明的测试时的输入层pylayer)

此类对应于val.prototxt中的输入层,即:


   
  1. layer {
  2. name: "data"
  3. type: "Python"
  4. top: "data"
  5. top: "label"
  6. python_param {
  7. module: "voc_layers"
  8. layer: "VOCSegDataLayer"
  9. param_str: "{\'voc_dir\': \'../data/VOC2012\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
  10. }
  11. }

具体源码解读如下


   
  1. #测试时用到的定义数据层的VOCSegDataLayer类(对应于测试集或验证集),类中根据caffe提供的python接口定义相
  2. #应的函数,详细可参见https://chrischoy.github.io/research/caffe-python-layer/
  3. class VOCSegDataLayer(caffe.Layer):
  4. """
  5. Load (input image, label image) pairs from PASCAL VOC
  6. one-at-a-time while reshaping the net to preserve dimensions.
  7. Use this to feed data to a fully convolutional network.
  8. """
  9. #setup函数,根据相应参数设置数据层
  10. def setup(self, bottom, top):
  11. """
  12. Setup data layer according to parameters:
  13. - voc_dir: path to PASCAL VOC year dir 测试集或验证集的路径
  14. - split: train / val / test split可以为train/val/test中的任意一者(即也可以看看训练集的效果)
  15. - mean: tuple of mean values to subtract 存储着所要减去的平均值(减去平均值可以加速迭代)
  16. - randomize: load in random order (default: True) 当randomize=True时,开启随机加载图片模式
  17. - seed: seed for randomization (default: None / current time) 随机模式的种子(默认值为None)
  18. for PASCAL VOC semantic segmentation.
  19. example
  20. params = dict(voc_dir="/path/to/PASCAL/VOC2011",
  21. mean=(104.00698793, 116.66876762, 122.67891434),
  22. split="val")
  23. """
  24. # config
  25. params = eval(self.param_str) #读入参数
  26. self.voc_dir = params[ 'voc_dir']
  27. self.split = params[ 'split']
  28. self.mean = np.array(params[ 'mean'])
  29. self.random = params.get( 'randomize', True)
  30. self.seed = params.get( 'seed', None)
  31. # two tops: data and label
  32. #判断输出是否包含数据和标记
  33. if len(top) != 2:
  34. raise Exception( "Need to define two tops: data and label.")
  35. # data layers have no bottoms
  36. #判断是否有输入(数据层不需要定义输入bottom)
  37. if len(bottom) != 0:
  38. raise Exception( "Do not define a bottom.")
  39. # load indices for images and labels
  40. #获取所需要加载的图片的编号(即读取'split'.txt文档中的图片索引编号,这些编号其实是图片名)
  41. split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
  42. self.split) #第一个{}即self.voc_dir;第二个{}即self.split
  43. #splitlines(),按行('\r', '\r\n', \n')分隔,返回一个包含各行作为元素的列表
  44. #即indices是所有图片编号的列表(按行存放成一列)
  45. self.indices = open(split_f, 'r').read().splitlines()
  46. self.idx = 0 #indices列表索引指针idx初始化为0
  47. # make eval deterministic
  48. #当split=test或者val时,不需要开启随机模式
  49. if 'train' not in self.split:
  50. self.random = False
  51. # randomization: seed and pick
  52. #判断是否开启随机读取图片模式
  53. if self.random:
  54. random.seed(self.seed)
  55. #随机生成一个整数作为索引号idx(范围为0~(len(self.indices)-1))
  56. self.idx = random.randint( 0, len(self.indices) -1)
  57. #reshape函数,根据索引号idx加载相应图片,并调整数据层的大小
  58. def reshape(self, bottom, top):
  59. # load image + label image pair
  60. #load_image()和load_label()函数在后面定义
  61. self.data = self.load_image(self.indices[self.idx])
  62. self.label = self.load_label(self.indices[self.idx])
  63. # reshape tops to fit (leading 1 is for batch dimension)
  64. '''
  65. 重新调整数据层的大小(即caffe所加载的数据层的大小在每次迭代训练中是可以变的,
  66. 因为数据层的大小并不影响各层参数的大小)
  67. caffe中的数据按N*C*H*W存储的,N为batch size,C为通道数,H和W分别为长和宽,这里的1即为batch size
  68. 也就对应了FCN论文中所讲到的采用SGD算法(随机梯度下降法,每一迭代训练的图片数为1)
  69. '''
  70. top[ 0].reshape( 1, *self.data.shape) #data
  71. top[ 1].reshape( 1, *self.label.shape) #label
  72. #定义前向传播函数forward(),数据层的前向传播不对数据进行任何操作,只是简单的输出数据本身
  73. def forward(self, bottom, top):
  74. # assign output
  75. top[ 0].data[...] = self.data
  76. top[ 1].data[...] = self.label
  77. #输出数据的同时,进行下一次迭代时所需要的图片的选择(即产生下一个索引号idx)
  78. # pick next input
  79. if self.random:
  80. self.idx = random.randint( 0, len(self.indices) -1)
  81. else:
  82. self.idx += 1
  83. if self.idx == len(self.indices):
  84. self.idx = 0
  85. #数据层不需要后向传播,直接pass
  86. def backward(self, top, propagate_down, bottom):
  87. pass
  88. #加载图片的函数(根据索引号idx进行加载)
  89. def load_image(self, idx):
  90. """
  91. Load input image and preprocess for Caffe: 加载图片并处理成caffe的数据格式
  92. - cast to float 转换为float型
  93. - switch channels RGB -> BGR 交换通道位置,即R通道和B通道交换(感觉是用了opencv库的原因)
  94. - subtract mean 减去均值
  95. - transpose to channel x height x width order 将通道数放在前面(对应caffe数据存储的格式)
  96. """
  97. im = Image.open( '{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
  98. in_ = np.array(im, dtype=np.float32)
  99. in_ = in_[:,:,:: -1] #-1表示从最后一维开始往前读取数据。即交换R通道和B通道
  100. in_ -= self.mean #减去均值
  101. in_ = in_.transpose(( 2, 0, 1)) #将通道数放在前面
  102. return in_

python中的PIL所读取的三通道彩色图片是按H*W*C存放的,且三通道是顺序是标准的RGB顺序,输入到caffe中处理前,需要进行相应的转换。

caffe中的数据存储方式是N*C*H*W,且是按BGR顺序存放三通道的。

所有需要先进行RGB转换到BGR,具体可直接使用 in_ = in_[:,:,::-1]语句实现,具体理解可参见以下例子(其中a的第三维可看成是C,且按RGB顺序存放,前两维可看成是H和W):


   
  1. import numpy as np
  2. a = np.array([[[ 1, 2, 3],[ 4, 5, 6]],[[ 7, 8, 9],[ 10, 11, 12]],[[ 13, 14, 15],[ 16, 17, 18]]])
  3. print(str(a.shape))
  4. print(str(a))
  5. a = a[:,:,:: -1
  6. #a = a.transpose((2,0,1))
  7. print(str(a.shape))
  8. print(str(a))

以这个例子来说,第一行第一列所在位置的像素点的像素值分别为:R=1,G=2,B=3

运行结果为(可以看出第一行第一列所在位置的像素点的像素值分别为:B=3,G=2,R=1):


   
  1. ( 3L, 2L, 3L)
  2. [[[ 1 2 3]
  3. [ 4 5 6]]
  4. [[ 7 8 9]
  5. [ 10 11 12]]
  6. [[ 13 14 15]
  7. [ 16 17 18]]]
  8. ( 3L, 2L, 3L)
  9. [[[ 3 2 1]
  10. [ 6 5 4]]
  11. [[ 9 8 7]
  12. [ 12 11 10]]
  13. [[ 15 14 13]
  14. [ 18 17 16]]]

在此基础上还需要减去各个通道的均值,来进行均值归一化来加速算法执行速度。

最后按照caffe的存储数据的格式将通道数放在前面,即利用python中的transpose()函数进行转置操作,具体理解参见以下例子:


   
  1. import numpy as np
  2. a = np.array([[[ 1, 2, 3],[ 4, 5, 6]],[[ 7, 8, 9],[ 10, 11, 12]],[[ 13, 14, 15],[ 16, 17, 18]]])
  3. print(str(a.shape))
  4. print(str(a))
  5. a = a[:,:,:: -1]
  6. a = a.transpose(( 2, 0, 1))
  7. print(str(a.shape))
  8. print(str(a))

运行结果如下(由此可看出原来的第三维变到了第一维):


   
  1. ( 3L, 2L, 3L)
  2. [[[ 1 2 3]
  3. [ 4 5 6]]
  4. [[ 7 8 9]
  5. [ 10 11 12]]
  6. [[ 13 14 15]
  7. [ 16 17 18]]]
  8. ( 3L, 3L, 2L)
  9. [[[ 3 6]
  10. [ 9 12]
  11. [ 15 18]]
  12. [[ 2 5]
  13. [ 8 11]
  14. [ 14 17]]
  15. [[ 1 4]
  16. [ 7 10]
  17. [ 13 16]]]

即,例如结果中的


   
  1. [[ 3 6]
  2. [ 9 12]
  3. [ 15 18]]

表示的是所有像素点的B通道的像素数值,也即表示的是原图像的B通道。


   
  1. #加载标记的函数(按照索引号idx加载相应的label图片)
  2. def load_label(self, idx):
  3. """
  4. Load label image as 1 x height x width integer array of label indices.
  5. The leading singleton dimension is required by the loss.
  6. """
  7. im = Image.open( '{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
  8. label = np.array(im, dtype=np.uint8) #标签是单通道的
  9. #np.newaxis的功能是插入新维度,即将原来的H×W转换为1×H×W
  10. label = label[np.newaxis, ...]
  11. return label
2.SBDDSegDataLayer类(也即net.py中申明的训练时的输入层pylayer)

此类对应于train.prototxt中的输入层,即:


   
  1. layer {
  2. name: "data"
  3. type: "Python"
  4. top: "data"
  5. top: "label"
  6. python_param {
  7. module: "voc_layers"
  8. layer: "SBDDSegDataLayer"
  9. param_str: "{\'sbdd_dir\': \'../data/VOC2012\', \'seed\': 1337, \'split\': \'train\', \'mean\': (104.00699, 116.66877, 122.67892)}"
  10. }
  11. }

SBDDSegDataLayer类的代码和VOCSegDataLayer类类似,在此不再重复解读,就其中的一小点进行说明。


   
  1. #训练时用到的定义数据层的SBDDSegDataLayer类(对应于训练集),类中根据caffe提供的python接口定义相
  2. #应的函数
  3. class SBDDSegDataLayer(caffe.Layer):
  4. """
  5. Load (input image, label image) pairs from the SBDD extended labeling
  6. of PASCAL VOC for semantic segmentation
  7. one-at-a-time while reshaping the net to preserve dimensions.
  8. Use this to feed data to a fully convolutional network.
  9. """
  10. def setup(self, bottom, top):
  11. """
  12. Setup data layer according to parameters:
  13. - sbdd_dir: path to SBDD `dataset` dir
  14. - split: train / seg11valid
  15. - mean: tuple of mean values to subtract
  16. - randomize: load in random order (default: True)
  17. - seed: seed for randomization (default: None / current time)
  18. for SBDD semantic segmentation.
  19. N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
  20. Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
  21. example
  22. params = dict(sbdd_dir="/path/to/SBDD/dataset",
  23. mean=(104.00698793, 116.66876762, 122.67891434),
  24. split="valid")
  25. """
  26. # config
  27. params = eval(self.param_str)
  28. self.sbdd_dir = params[ 'sbdd_dir']
  29. self.split = params[ 'split']
  30. self.mean = np.array(params[ 'mean'])
  31. self.random = params.get( 'randomize', True)
  32. self.seed = params.get( 'seed', None)
  33. # two tops: data and label
  34. if len(top) != 2:
  35. raise Exception( "Need to define two tops: data and label.")
  36. # data layers have no bottoms
  37. if len(bottom) != 0:
  38. raise Exception( "Do not define a bottom.")
  39. # load indices for images and labels
  40. split_f = '{}/{}.txt'.format(self.sbdd_dir,
  41. self.split)
  42. self.indices = open(split_f, 'r').read().splitlines()
  43. self.idx = 0
  44. # make eval deterministic
  45. if 'train' not in self.split:
  46. self.random = False
  47. # randomization: seed and pick
  48. if self.random:
  49. random.seed(self.seed)
  50. self.idx = random.randint( 0, len(self.indices) -1)
  51. def reshape(self, bottom, top):
  52. # load image + label image pair
  53. self.data = self.load_image(self.indices[self.idx])
  54. self.label = self.load_label(self.indices[self.idx])
  55. # reshape tops to fit (leading 1 is for batch dimension)
  56. top[ 0].reshape( 1, *self.data.shape)
  57. top[ 1].reshape( 1, *self.label.shape)
  58. def forward(self, bottom, top):
  59. # assign output
  60. top[ 0].data[...] = self.data
  61. top[ 1].data[...] = self.label
  62. # pick next input
  63. if self.random:
  64. self.idx = random.randint( 0, len(self.indices) -1)
  65. else:
  66. self.idx += 1
  67. if self.idx == len(self.indices):
  68. self.idx = 0
  69. def backward(self, top, propagate_down, bottom):
  70. pass
  71. def load_image(self, idx):
  72. """
  73. Load input image and preprocess for Caffe:
  74. - cast to float
  75. - switch channels RGB -> BGR
  76. - subtract mean
  77. - transpose to channel x height x width order
  78. """
  79. im = Image.open( '{}/img/{}.jpg'.format(self.sbdd_dir, idx))
  80. in_ = np.array(im, dtype=np.float32)
  81. in_ = in_[:,:,:: -1]
  82. in_ -= self.mean
  83. in_ = in_.transpose(( 2, 0, 1))
  84. return in_
  85. def load_label(self, idx):
  86. """
  87. Load label image as 1 x height x width integer array of label indices.
  88. The leading singleton dimension is required by the loss.
  89. """
  90. import scipy.io
  91. mat = scipy.io.loadmat( '{}/cls/{}.mat'.format(self.sbdd_dir, idx)) #训练集的标签为.mat格式
  92. label = mat[ 'GTcls'][ 0][ 'Segmentation'][ 0].astype(np.uint8)
  93. label = label[np.newaxis, ...]
  94. return label

SBDDSegDataLayer类所加载的训练样本的标记图片是按Matlab的mat进行存储的,但实际使用时,我们没有必要按照mat格式来加载标记图片,可参见VOCSegDataLayer类直接读取.png或.jpg格式的标记图片,即可将这个load_label()函数修改为:


   
  1. def load_label(self, idx):
  2. """
  3. Load label image as 1 x height x width integer array of label indices.
  4. The leading singleton dimension is required by the loss.
  5. """
  6. im = Image.open( '{}/SegmentationClass/{}.png'.format(self.sbdd_dir, idx))
  7.         label = np.array(im, dtype=np.uint8)
  8.         label = label[np.newaxis, ...]
  9. return label




评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值