Efficient-B0的总体结构,其中Conv=(Conv+BN_Swish),其中,如果一个module重复2次或者2次以上,那么stride=2仅仅是重复中第1次时候的参数,后续的重复module的stride=1
+-------+--------------------------+------------+--------------+---------+--------+
| Stage | Module | input_size | out_channels | repeats | stride |
+-------+--------------------------+------------+--------------+---------+--------+
| 1 | Conv(3x3) | 224x224 | 32 | 1 | 2 |
| 2 | MBConv1,k3x3 | 112x112 | 16 | 1 | 1 |
| 3 | MBConv6,k3x3 | 112x112 | 24 | 2 | 2 |
| 4 | MBConv6,k5x5 | 56x56 | 40 | 2 | 2 |
| 5 | MBConv6,k3x3 | 28x28 | 80 | 3 | 2 |
| 6 | MBConv6,k5x5 | 14x14 | 112 | 3 | 1 |
| 7 | MBConv6,k5x5 | 14x14 | 192 | 4 | 2 |
| 8 | MBConv6,k3x3 | 7x7 | 320 | 1 | 1 |
| 9 | Conv(1x1) & Pooling & FC | 7x7 | 1280 | 1 | None |
+-------+--------------------------+------------+--------------+---------+--------+
MBConv Module
MBConv6:第1个升维的1x1卷积层,它的卷积核个数是输入矩阵的channel的6倍
当MBConv1时,不使用第1个升维的1x1卷积层,即Stage2中的MBConv结构中没有第1个升维的1x1的卷积层,
shortcut:当且仅当输入MBConv的矩阵与输出矩阵的shape相同时才存在
MBConv: MobilenetV3_Conv
image ---> Conv(1x1,cin->cin*n) --> Depthwise --> SE --> ConvBN(1x1,cin*n->cout)-->dropout--> + --->output
| 升维 降维 |
| |
---------------------------------------------------------------------------------------
其中升维之后进行BN+Swish, Depthwise之后进行BN+Swish,降维之后,只有BN
配置参数
import torch
import torch.nn as nn
from math import ceil
base_model=[
# expand_ratio, channels, repeats, stride, kernel_size
[1, 16, 1, 1, 3],
[6, 24, 2, 2, 3],
[6, 40, 2, 2, 5],
[6, 80, 3, 2, 3],
[6, 112,3, 1, 5],
[6, 192,4, 2, 5],
[6, 320,1, 1, 3],
]
phi_values = {
# tuple of :(phi_value, resolution, drop_rate)
"b0": (0, 224, 0.2 ), # alpha, beta, gamma, depth = alphi*phi
"b1": (0.5, 240, 0.2),
"b2": (1, 260, 0.3),
"b3": (2, 300, 0.3),
"b4": (3, 380, 0.4),
"b5": (4, 456, 0.4),
"b6": (5, 528, 0.5),
"b7": (6, 600, 0.5),
}
卷积+BN+Swish
# CBS :Conv+Bn+Silu
class CNNBlock(nn.Module):
def __init__(self, int_channels, out_channels, kernel_size, stride, padding, groups=1):
super(CNNBlock,self).__init__()
self.cnn = nn.Conv2d(
int_channels,
out_channels,
kernel_size,
stride,
padding,
groups = groups,
bias=False
)
# group=1:normal conv, group=int_channels:Depthwise conv
self.bn = nn.BatchNorm2d(out_channels)
self.silu = nn.SiLU() # Silu <-> Swish
def forward(self,x):
x = self.cnn(x)
x = self.bn(x)
x = self.silu(x)
return x
SE注意力机制
# SE attention
class SqueezeExcitation(nn.Module):
def __init__(self,in_channels ,reduced_dim):
super(SqueezeExcitation,self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1), # [B, C, H, W] -> [b, C, 1, 1]
nn.Conv2d(in_channels,reduced_dim, 1), # [b, r, 1, 1]
nn.SiLU(),
nn.Conv2d(reduced_dim,in_channels,1), # [b, C, 1, 1]
nn.Sigmoid()
)
def forward(self,x):
return x * self.se(x) # [B,C,H,W] * [B,C,1,1]
MBConv
class InvertedResidualBlock(nn.Module):
def __init__(self,
in_channels, out_channels, kernel_size,stride, padding,
expand_ratio,
reduction = 4, # squeeze excitation
survival_prob = 0.8 # for stochastic depth
) :
super(InvertedResidualBlock, self).__init__()
self.survival_prob = 0.8
self.use_residual = in_channels == out_channels and stride == 1
hidden_dim = in_channels * expand_ratio
self.expand = in_channels != hidden_dim
reduced_dim = int(in_channels / reduction)
# increase dimension, when expand_ratio=1this CNNBlock is discarded.
if self.expand:
self.expand_conv = CNNBlock(
# in_channels,hidden_dim,kernel_size=3, stride = 1, padding = 1,
in_channels,hidden_dim,kernel_size=1, stride = 1, padding = 0,
)
self.conv = nn.Sequential(
CNNBlock(
hidden_dim,hidden_dim,kernel_size,stride, padding,groups=hidden_dim
),
SqueezeExcitation(hidden_dim, reduced_dim),
nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
)
# randomly drop (or keep) layers during training
# Dropout layers are only present in the this modules that are connected using shortcuts
def stochastic_depth(self,x):
if not self.training:
return x
binary_tensor = torch.rand(x.shape[0],1,1,1, device=x.device) < self.survival_prob
return torch.div(x, self.survival_prob) * binary_tensor
def forward(self, inputs):
x = self.expand_conv(inputs) if self.expand else inputs
if self.use_residual:
x = self.conv(x)
x = self.stochastic_depth(x)
x = x + inputs
return x
else :
return self.conv(x)
EfficientNet
class EfficientNet(nn.Module):
def __init__(self, version, num_classes) :
super(EfficientNet, self).__init__()
width_factor, depth_factor,dropout_rate = self.calculate_factors(version)
last_channels = ceil(1280 * width_factor)
self.pool = nn.AdaptiveAvgPool2d(1)
self.features = self.create_features(width_factor,depth_factor,last_channels)
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(last_channels, num_classes),
)
def calculate_factors(self, version, alpha = 0.2, beta = 1.1):
phi, res, drop_rate = phi_values[version]
depth_factor = alpha ** phi
width_factor = beta ** phi
return width_factor, depth_factor, drop_rate
def create_features(self, width_factor,depth_factor,last_channels):
channels = int(32 * width_factor)
features = [CNNBlock(3, channels, 3, stride=2, padding=1)]
in_channels = channels
for expand_ratio, channels, repeats, stride, kernel_size in base_model:
out_channels = 4 * ceil( int(channels*width_factor) / 4)
layer_repeats = ceil(repeats * depth_factor)
for layer in range(layer_repeats):
features.append(
InvertedResidualBlock(
in_channels,
out_channels,
expand_ratio = expand_ratio,
stride = stride if layer == 0 else 1,
kernel_size = kernel_size,
padding = kernel_size // 2,
)
)
in_channels = out_channels
features.append(
CNNBlock(in_channels,last_channels,kernel_size=1, stride=1, padding=0)
)
return nn.Sequential( *features )
def forward(self,x):
x = self.features(x)
x = self.pool(x)
x = self.classifier( x.view( x.shape[0], -1 ) )
return x
test
def test():
device = "cuda" if torch.cuda.is_available() else "cpu"
version = "b0"
phi, res, drop_rate = phi_values[version]
num_examples, num_classes = 4, 10
x = torch.randn( (num_examples, 3, res, res) ).to(device)
model = EfficientNet(version=version, num_classes=num_classes).to(device)
print(model(x).shape)
test()