基于ACNet构建ResNet50backbone的Pytorch代码实现

最新推荐文章于 2024-09-04 23:35:45 发布

practical_sharp

最新推荐文章于 2024-09-04 23:35:45 发布

阅读量4.6k

点赞数 4

CC 4.0 BY-SA版权

分类专栏：深度学习文章标签： ResNet

本文链接：https://blog.youkuaiyun.com/practical_sharp/article/details/114932462

深度学习专栏收录该内容

21 篇文章

订阅专栏

本文介绍ACNet非对称卷积技术，并探讨其在ResNet50骨干网络中的应用，通过引入1x3及3x1卷积核，增强网络性能并提高分类准确性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

ACNet是什么？

[1]X. Ding, Y. Guo, G. Ding etal.“ACNet: Strengthening the Kernel Skeletons for Powerful CNN via Asymmetric Convolution Blocks” in ICCV 2019

在这里插入图片描述

Overview of ACNet. For example, we replace every 3 × 3 layer with an ACB comprising three layers with 3 × 3, 1×3 and 3×1 kernels, respectively, and their outputs are summed up. When the training is completed, we convert the model back into the same structure as the original by adding the asymmetric kernels in each ACB onto the skeleton, which is the crisscross part of the square kernel, as marked on the figure. In practice, this conversion is implemented by building a new model with the original structure and using the converted learned parameters of the ACNet to initialize it.

总结来说ACnet就是非对称卷积，能够学习到更多特征，以ACNet构建的backbone能提高网络在 CIFAR-10, CIFAR-100, and ImageNet上的分类性能。

接下来我准备使用ACNet构造ResNet50的backbone来训练目标检测模型，测试一下模型能否存在性能提升问题。

import torch
from torch import nn as nn
# 去掉因为3x3卷积的padding多出来的行或者列
class CropLayer(nn.Module):

    #   E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns
    def __init__(self, crop_set):
        super(CropLayer, self).__init__()
        self.rows_to_crop = - crop_set[0]
        self.cols_to_crop = - crop_set[1]
        assert self.rows_to_crop >= 0
        assert self.cols_to_crop >= 0

    def forward(self, input):
        return input[:, :, self.rows_to_crop:-self.rows_to_crop, self.cols_to_crop:-self.cols_to_crop]

# 论文提出的3x3+1x3+3x1，其中3*3卷积还是由self.conv2来实现
# 实现一个AC卷积块只包含1*3和3*1这两个不对称卷积，这两个部分是没有与预训练权重的
# 使用这个模块的时候 padding必须先定义为 = 1
class ACBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False):
        super(ACBlock, self).__init__()
        self.deploy = deploy
        if deploy:
            self.fused_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size,kernel_size), stride=stride,
                                      padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
        else:
            center_offset_from_origin_border = padding - kernel_size // 2
            ver_pad_or_crop = (center_offset_from_origin_border + 1, center_offset_from_origin_border)
            hor_pad_or_crop = (center_offset_from_origin_border, center_offset_from_origin_border + 1)
            if center_offset_from_origin_border >= 0:
                self.ver_conv_crop_layer = nn.Identity()
                ver_conv_padding = ver_pad_or_crop
                self.hor_conv_crop_layer = nn.Identity()
                hor_conv_padding = hor_pad_or_crop
            else:
                self.ver_conv_crop_layer = CropLayer(crop_set=ver_pad_or_crop)
                ver_conv_padding = (0, 0)
                self.hor_conv_crop_layer = CropLayer(crop_set=hor_pad_or_crop)
                hor_conv_padding = (0, 0)
            self.ver_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 1),
                                      stride=stride,
                                      padding=ver_conv_padding, dilation=dilation, groups=groups, bias=False,
                                      padding_mode=padding_mode)

            self.hor_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 3),
                                      stride=stride,
                                      padding=hor_conv_padding, dilation=dilation, groups=groups, bias=False,
                                      padding_mode=padding_mode)
            self.ver_bn = nn.BatchNorm2d(num_features=out_channels)
            self.hor_bn = nn.BatchNorm2d(num_features=out_channels)


    # forward function
    def forward(self, input):
        if self.deploy:
            return self.fused_conv(input)
        else:
            # square_outputs = self.square_conv(input)
            # square_outputs = self.square_bn(square_outputs)
            # print(square_outputs.size())
          
            vertical_outputs = self.ver_conv_crop_layer(input)
            vertical_outputs = self.ver_conv(vertical_outputs)
            vertical_outputs = self.ver_bn(vertical_outputs)
            # print(vertical_outputs.size())

            horizontal_outputs = self.hor_conv_crop_layer(input)
            horizontal_outputs = self.hor_conv(horizontal_outputs)
            horizontal_outputs = self.hor_bn(horizontal_outputs)
            # print(horizontal_outputs.size())
            return vertical_outputs + horizontal_outputs

# this is the original ResNet Block, uesing the ACBlock replace the 3*3 conv 
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # 1*1 的卷积  不需要使用 ACBlock
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels
        self.bn1 = norm_layer(out_channel)
        # -----------------------------------------
        # 3*3 的卷积  需要使用 ACBlock
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                              kernel_size=3, stride=stride, bias=False, padding=1)
        
        self.ACBlock = ACBlock(in_channels = out_channel,out_channels= out_channel,
                            kernel_size = 3, stride=stride, padding=1, dilation=1, 
                            groups=1, padding_mode='zeros', deploy=False)
    
        self.bn2 = norm_layer(out_channel)
        # -----------------------------------------
        # 1*1 的卷积  不需要使用 ACBlock
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = norm_layer(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        # some ResNet Block maybe need downsample to decrease the size of feature map(usually 2X)
        # the definition of downsample module in class ResNet 
        self.downsample = downsample
    # original ResNet Block
    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        square_outputs = self.conv2(out) 
        other_outputs = self.ACBlock(out)
        out = square_outputs + other_outputs  
        out = self.bn2(out)
        out = self.relu(out)

        

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)

        return out


x = torch.Tensor(8,3,256,256)
ac = ACBlock(in_channels = 3, out_channels = 64, kernel_size = 3, stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False)
print(ac)

ACBlock的原本结构

在这里插入图片描述

但是为了预训练权重方便，还是保留了BottleBlock的conv2，所以ACBBlock只剩下了13和31的预训练权重。

在这里插入图片描述

如何用ACBlock构建ResNet50，使用ACBlock这个类代替普通的3*3的卷积就好了

## 接上面的代码
## 完全不需要改变
class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):
        super(ResNet, self).__init__()

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.include_top = include_top 
        self.in_channel = 64
        # 输入图像都是3个通道的，最开始进入ResNet的网络channel = 3
        # 第一个卷积层也是一个下采样，stride = 2 会造成下采样X2
        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = norm_layer(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        #  池化层，会损失很多信息，stride = 2 会造成下采样X2
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)   # 这里传入的stride = 2,需要下采样
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)   # 这里传入的stride = 2,需要下采样
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)   # 这里传入的stride = 2,需要下采样
        # 图像分类需要有最后的全连接层，定义目标检测模型时候 self.include_top = False
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        # 如果需要下采样了，self.in_channel == channel * block.expansion就不满足了
        if stride != 1 or self.in_channel != channel * block.expansion:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))
        # 先判断第一个block是否需要下采样
        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion
        #　从1开始循环而不是从0开始循环
        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        #  eg. x = torch.Tensor(8,3,256,256)
        x = self.conv1(x)
        # after the first conv,torch.Size([8, 64, 128, 128])
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # after maxpooling :torch.Size([8, 64, 64, 64])
        
        x = self.layer1(x)
        # torch.Size([8, 256, 64, 64])

        x = self.layer2(x)
        # torch.Size([8, 512, 32, 32])

        x = self.layer3(x)
        # torch.Size([8, 1024, 16, 16])
       
        x = self.layer4(x)
        # torch.Size([8, 2048, 8, 8])

        # object detection module need not this if sentence
        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x



"""
x = torch.Tensor(8,3,256,256)
net  = ResNet(Bottleneck,[3,5,6,3],include_top=False)
print(net)
out = net(x)
print(out.size())
"""