前言
算法和工程是我们算法工程师不可缺少的两种能力,之前我介绍了DeepLab V1,V2, V3,但总是感觉少了点什么?只有Paper,没有源码那不相当于是纸上谈兵了,所以今天尝试结合论文的源码来进行仔细的分析这三个算法。等我们分析清楚这三个算法之后,有机会再解析一下DeepLabV3。由于博主最近正在看Pytorch版本的《动手学深度学习》,不妨用Pytorch的源码来进行分析。我分析的源码均来自这个Pytorch工程:https://github.com/kazuto1011/deeplab-pytorch/tree/master/libs/models
DeepLab V1源码分析
DeepLab V1的算法原理可以看我之前的推文,地址是:https://mp.weixin.qq.com/s/rvP8-Y-CRuq4HFzR0qJWcg 。我们今天解析的网络模型是在ResNet残差模块的基础上融合空洞卷积实现的,第一层为 普通卷积,stride = 2,紧跟着 stride = 2 的 max-pooling,然后一个普通的 bottleneck ,一个 stride = 2 的 bottleneck,然后 dilation =2、dilation =4 的bottleneck。
from __future__ import absolute_import, print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
# 定义DeepLabV1的网络结构
class DeepLabV1(nn.Sequential):
"""
DeepLab v1: Dilated ResNet + 1x1 Conv
Note that this is just a container for loading the pretrained COCO model and not mentioned as "v1" in papers.
"""
def __init__(self, n_classes, n_blocks):
super(DeepLabV1, self).__init__()
ch = [64 * 2 ** p for p in range(6)]
self.add_module("layer1", _Stem(ch[0]))
self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1))
self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1))
self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2))
self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4))
self.add_module("fc", nn.Conv2d(2048, n_classes, 1))
# 这里是看一下是使用torch的nn模块中BatchNorm还是在encoding文件中定义的BatchNorm
try:
from encoding.nn import SyncBatchNorm
_BATCH_NORM = SyncBatchNorm
except:
_BATCH_NORM = nn.BatchNorm2d
_BOTTLENECK_EXPANSION = 4
# 定义卷积+BN+ReLU的组件
class _ConvBnReLU(nn.Sequential):
"""
Cascade of 2D convolution, batch norm, and ReLU.
"""
BATCH_NORM = _BATCH_NORM
def __init__(
self, in_ch, out_ch, kernel_size, stride, padding, dilation, relu=True
):
super(_ConvBnReLU, self).__init__()
self.add_module(
"conv",
nn.Conv2d(
in_ch, out_ch, kernel_size, stride, padding, dilation, bias=False
),
)
self.add_module("bn", _BATCH_NORM(out_ch, eps=1e-5, momentum=0.999))
if relu:
self.add_module("relu", nn.ReLU())
# 定义Bottleneck,先1*1卷积降维,然后使用3*3卷积,最后再1*1卷积升维,然后再shortcut连接。
# 降维到多少是由_BOTTLENECK_EXPANSION参数决定的,这是ResNet的Bottleneck。
class _Bottleneck(nn.Module):
"""
Bottleneck block of MSRA ResNet.
"""
def __init__(self, in_ch, out_ch, stride, dilation, downsample):
super(_Bottleneck, self).__init__()
mid_ch = out_ch // _BOTTLENECK_EXPANSION
self.reduce = _ConvBnReLU(in_ch, mid_ch, 1, stride, 0, 1, True)
self.conv3x3 = _ConvBnReLU(mid_ch, mid_ch, 3, 1, dilation, dilation, True)
self.increase = _ConvBnReLU(mid_ch, out_ch, 1, 1, 0, 1, False)
self.shortcut = (
_ConvBnReLU(in_ch, out_ch, 1, stride, 0, 1, False)
if downsample
else lambda x: x # identity
)
def forward(self, x):
h = self.reduce(x)
h = self.conv3x3(h)
h = self.increase(h)
h += self.shortcut(x)
return F.relu(h)
# 定义ResLayer,整个DeepLabv1是用ResLayer堆叠起来的,下采样是在每个ResLayer的第一个
# Bottleneck发生的。
class _ResLayer(nn.Sequential):
"""
Residual layer with multi grids
"""
def __init__(self, n_layers, in_ch, out_ch, stride, dilation, multi_grids=None):
super(_ResLayer, self).__init__()
if multi_grids is None:
multi_grids = [1 for _ in range(n_layers)]
else:
assert n_layers == len(multi_grids)
# Downsampling is only in the first block
for i in range(n_layers):
self.add_module(
"block{}".format(i + 1),
_Bottleneck(
in_ch=(in_ch if i == 0 else out_ch),
out_ch=out_ch,
stride=(stride if i == 0 else 1),
dilation=dilation * multi_grids[i],
downsample=(True if i == 0 else False),
),
)
# 在进入ResLayer之前,先用7*7的卷积核在原图滑动,增大感受野。padding方式设为same,大小不变。
# Pool层的核大小为3,步长为2,这会导致特征图的分辨率发生变化。
class _Stem(nn.Sequential):
"""
The 1st conv layer.
Note that the max pooling is different from both MSRA and FAIR ResNet.
"""
def __init__(self, out_ch):
super(_Stem, self).__init__()
self.add_

最低0.47元/天 解锁文章
1872





