卷积
代码
import torch
import torch.nn as nn
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
super(BasicConv, self).__init__()
if in_planes <= 0 or out_planes <= 0:
raise ValueError("Input and output channels must be greater than zero")
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU(inplace=True) if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class Test(nn.Module):
def __init__(self):
super(Test, self).__init__()
# Branch 1: Dilated Convolution with dilation rate 3 and stride 2 for downsampling
self.branch1 = nn.Sequential(
BasicConv(3, 3, kernel_size=3, stride=1, padding=1, dilation=1),
)
def forward(self, x):
x1 = self.branch1(x)
return x1
# Example usage
if __name__ == "__main__":
inputs = torch.randn(1, 3, 608, 608) # Example input tensor (batch_size, channels, height, width)
# net = BasicRFB(in_planes=3, out_planes=3,stride=4) # Use stride=4 for aggressive downsampling
# out = net(inputs)
net =Test()
out = net(inputs)
print("Input shape:", inputs.shape)
print("Output shape:", out.shape)
padding(当 stride=1时,padding=kernel_size//2,保持原尺寸)
BasicConv(3, 3, kernel_size=3, stride=1, padding=1, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 608, 608])
BasicConv(3, 3, kernel_size=3, stride=1, padding=2, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 610, 610])
BasicConv(3, 3, kernel_size=3, stride=1, padding=3, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 612, 612])
padding(当 stride=2时,padding=kernel_size//2,保持原尺寸)
BasicConv(3, 3, kernel_size=3, stride=2, padding=1, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 304, 304])
BasicConv(3, 3, kernel_size=3, stride=2, padding=2, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 305, 305])
BasicConv(3, 3, kernel_size=3, stride=2, padding=3, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 306, 306])
padding(当 stride=3时,padding=kernel_size//2,保持原尺寸)
BasicConv(3, 3, kernel_size=3, stride=3, padding=1, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 203, 203])
BasicConv(3, 3, kernel_size=3, stride=3, padding=2, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 204, 204])
BasicConv(3, 3, kernel_size=3, stride=3, padding=3, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 204, 204])
dilation:膨胀率()卷积核每隔几个。
BasicConv(3, 3, kernel_size=3, stride=4, padding=1, dilation=1),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 152, 152])
BasicConv(3, 3, kernel_size=3, stride=4, padding=1, dilation=2),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 152, 152])
BasicConv(3, 3, kernel_size=4, stride=4, padding=2, dilation=2),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 152, 152])
BasicConv(3, 3, kernel_size=4, stride=4, padding=3, dilation=3),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 152, 152])
BasicConv(3, 3, kernel_size=4, stride=4, padding=6, dilation=5),
Input shape: torch.Size([1, 3, 608, 608])
Output shape: torch.Size([1, 3, 152, 152])
总结
stride控制尺度变化的整体倍数
kernel_size控制卷积核的大小。主体
padding则能微弱调节经过多少次卷积。微调最终输出的尺寸
dilation怎能控制卷积核的感受野,配合padding操作,可以实现相同尺度下的不同感受野。