CRNN
backbone:
主干网络,作用就是提取图片中的信息,共后面的网络使用
neck:
在backbone和head之间,为了更好的利用backbone提取的特征
head:
获取网络输出内容的网络,head利用之前提取的特征,做出预测
Backbone:ResNet 34
class ResNet(nn.Layer):
def __init__(self, block, layers, in_channels=3, dcn=None):
# 初始化函数,定义 ResNet 模型的结构和参数
self.dcn = dcn
self.inplanes = 64
super(ResNet, self).__init__()
self.out_channels = [] # 保存每个阶段的输出通道数
# 初始卷积层
self.conv1 = nn.Conv2D(
in_channels, # 输入通道数 in_channels,默认为3(彩色图像)
64, # 输出通道数64
kernel_size=7, # 卷积核大小7x7
stride=2, # 步长2
padding=3, # 填充3
bias_attr=False)
# BatchNormalization 归一化层,用于提高模型训练的稳定性,输入通道数64
self.bn1 = BatchNorm2d(64, momentum=0.1)
# ReLU 激活函数,增加模型的非线性特性
self.relu = nn.ReLU()
# 最大池化层,池化核大小3x3,步长2,填充1
self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
# 构建 ResNet 的第一阶段
self.layer1 = self._make_layer(block, 64, layers[0])
# 构建 ResNet 的第二阶段,stride=2 表示下采样
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dcn=dcn)
# 构建 ResNet 的第三阶段,stride=2 表示下采样
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dcn=dcn)
# 构建 ResNet 的第四阶段,stride=2 表示下采样
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dcn=dcn)
# 如果使用 DCN(Deformable Convolutional Networks),对偏移参数进行初始化
if self.dcn is not None:
for m in self.modules():
if isinstance(m, Bottleneck) or isinstance(m, BasicBlock):
if hasattr(m, 'conv2_offset'):
constant_init(m.conv2_offset, 0)
def _make_layer(self, block, planes, blocks, stride=1, dcn=None):
# 构建 ResNet 的阶段,每个阶段包含多个残差块
downsample = None
# 如果需要下采样,定义一个下采样模块,通过 1x1 卷积实现
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2D(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias_attr=False),
BatchNorm2d(
planes * block.expansion, momentum=0.1), )
# 构建多个残差块,并更新输出通道数列表
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, dcn=dcn))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dcn=dcn))
self.out_channels.append(planes * block.expansion)
return nn.Sequential(*layers)
def forward(self, x):
# 定义前向传播