DeepLab语义分割算法源码解析

最新推荐文章于 2024-11-19 19:07:14 发布

原创

最新推荐文章于 2024-11-19 19:07:14 发布 · 646 阅读

5 ·

CC 4.0 BY-SA版权

前言

算法和工程是我们算法工程师不可缺少的两种能力，之前我介绍了DeepLab V1，V2, V3，但总是感觉少了点什么？只有Paper，没有源码那不相当于是纸上谈兵了，所以今天尝试结合论文的源码来进行仔细的分析这三个算法。等我们分析清楚这三个算法之后，有机会再解析一下DeepLabV3。由于博主最近正在看Pytorch版本的《动手学深度学习》，不妨用Pytorch的源码来进行分析。我分析的源码均来自这个Pytorch工程：https://github.com/kazuto1011/deeplab-pytorch/tree/master/libs/models

DeepLab V1源码分析

DeepLab V1的算法原理可以看我之前的推文，地址是：https://mp.weixin.qq.com/s/rvP8-Y-CRuq4HFzR0qJWcg 。我们今天解析的网络模型是在ResNet残差模块的基础上融合空洞卷积实现的，第一层为普通卷积，stride = 2，紧跟着 stride = 2 的 max-pooling，然后一个普通的 bottleneck ，一个 stride = 2 的 bottleneck，然后 dilation =2、dilation =4 的bottleneck。

from __future__ import absolute_import, print_function

import torch
import torch.nn as nn
import torch.nn.functional as F

# 定义DeepLabV1的网络结构
class DeepLabV1(nn.Sequential):
    """
    DeepLab v1: Dilated ResNet + 1x1 Conv
    Note that this is just a container for loading the pretrained COCO model and not mentioned as "v1" in papers.
    """

    def __init__(self, n_classes, n_blocks):
        super(DeepLabV1, self).__init__()
        ch = [64 * 2 ** p for p in range(6)]
        self.add_module("layer1", _Stem(ch[0]))
        self.add_module("layer2", _ResLayer(n_blocks[0], ch[0], ch[2], 1, 1))
        self.add_module("layer3", _ResLayer(n_blocks[1], ch[2], ch[3], 2, 1))
        self.add_module("layer4", _ResLayer(n_blocks[2], ch[3], ch[4], 1, 2))
        self.add_module("layer5", _ResLayer(n_blocks[3], ch[4], ch[5], 1, 4))
        self.add_module("fc", nn.Conv2d(2048, n_classes, 1))

# 这里是看一下是使用torch的nn模块中BatchNorm还是在encoding文件中定义的BatchNorm

try:
    from encoding.nn import SyncBatchNorm

    _BATCH_NORM = SyncBatchNorm
except:
    _BATCH_NORM = nn.BatchNorm2d

_BOTTLENECK_EXPANSION = 4

# 定义卷积+BN+ReLU的组件
class _ConvBnReLU(nn.Sequential):
    """
    Cascade of 2D convolution, batch norm, and ReLU.
    """

    BATCH_NORM = _BATCH_NORM

    def __init__(
            self, in_ch, out_ch, kernel_size, stride, padding, dilation, relu=True
    ):
        super(_ConvBnReLU, self).__init__()
        self.add_module(
            "conv",
            nn.Conv2d(
                in_ch, out_ch, kernel_size, stride, padding, dilation, bias=False
            ),
        )
        self.add_module("bn", _BATCH_NORM(out_ch, eps=1e-5, momentum=0.999))

        if relu:
            self.add_module("relu", nn.ReLU())


# 定义Bottleneck，先1*1卷积降维，然后使用3*3卷积，最后再1*1卷积升维，然后再shortcut连接。
# 降维到多少是由_BOTTLENECK_EXPANSION参数决定的，这是ResNet的Bottleneck。
class _Bottleneck(nn.Module):
    """
    Bottleneck block of MSRA ResNet.
    """

    def __init__(self, in_ch, out_ch, stride, dilation, downsample):
        super(_Bottleneck, self).__init__()
        mid_ch = out_ch // _BOTTLENECK_EXPANSION
        self.reduce = _ConvBnReLU(in_ch, mid_ch, 1, stride, 0, 1, True)
        self.conv3x3 = _ConvBnReLU(mid_ch, mid_ch, 3, 1, dilation, dilation, True)
        self.increase = _ConvBnReLU(mid_ch, out_ch, 1, 1, 0, 1, False)
        self.shortcut = (
            _ConvBnReLU(in_ch, out_ch, 1, stride, 0, 1, False)
            if downsample
            else lambda x: x  # identity
        )

    def forward(self, x):
        h = self.reduce(x)
        h = self.conv3x3(h)
        h = self.increase(h)
        h += self.shortcut(x)
        return F.relu(h)

# 定义ResLayer，整个DeepLabv1是用ResLayer堆叠起来的，下采样是在每个ResLayer的第一个
# Bottleneck发生的。
class _ResLayer(nn.Sequential):
    """
    Residual layer with multi grids
    """

    def __init__(self, n_layers, in_ch, out_ch, stride, dilation, multi_grids=None):
        super(_ResLayer, self).__init__()

        if multi_grids is None:
            multi_grids = [1 for _ in range(n_layers)]
        else:
            assert n_layers == len(multi_grids)

        # Downsampling is only in the first block
        for i in range(n_layers):
            self.add_module(
                "block{}".format(i + 1),
                _Bottleneck(
                    in_ch=(in_ch if i == 0 else out_ch),
                    out_ch=out_ch,
                    stride=(stride if i == 0 else 1),
                    dilation=dilation * multi_grids[i],
                    downsample=(True if i == 0 else False),
                ),
            )

# 在进入ResLayer之前，先用7*7的卷积核在原图滑动，增大感受野。padding方式设为same，大小不变。
# Pool层的核大小为3，步长为2，这会导致特征图的分辨率发生变化。
class _Stem(nn.Sequential):
    """
    The 1st conv layer.
    Note that the max pooling is different from both MSRA and FAIR ResNet.
    """

    def __init__(self, out_ch):
        super(_Stem, self).__init__()
        self.add_