class EUCB(nn.Module):
def __init__(self, in_channels, kernel_size=3, stride=1):
super(EUCB,self).__init__()
self.in_channels = in_channels
self.out_channels = in_channels
self.up_dwc = nn.Sequential(
nn.Upsample(scale_factor=2),
Conv(self.in_channels, self.in_channels, kernel_size, g=self.in_channels, s=stride)
)
self.pwc = nn.Sequential(
nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, bias=True)
)
def forward(self, x):
x = self.up_dwc(x)
x = self.channel_shuffle(x, self.in_channels)
x = self.pwc(x)
return x
def channel_shuffle(self, x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
# Multi-scale depth-wise convolution (MSDC)
class MSDC(nn.Module):
def __init__(self, in_channels, kernel_sizes, stride, dw_parallel=True):
super(MSDC, self).__init__()
self.in_channels = in_channels
self.kernel_sizes = kernel_sizes
self.dw_parallel = dw_parallel
self.dwconvs = nn.ModuleList([
nn.Sequential(
Conv(self.in_channels, self.in_channels, kernel_size, s=stride, g=self.in_channels)
)
for kernel_size in self.kernel_sizes
])
def forward(self, x):
# Apply the convolution layers in a loop
outputs = []
for dwconv in self.dwconvs:
dw_out = dwconv(x)
outputs.append(dw_out)
if self.dw_parallel == False:
x = x+dw_out
# You can return outputs based on what you intend to do with them
return outputs
class MSCB(nn.Module):
"""
Multi-scale convolution block (MSCB)
"""
def __init__(self, in_channels, out_channels, kernel_sizes=[1,3,5], stride=1, expansion_factor=2, dw_parallel=True, add=True):
super(MSCB, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
self.kernel_sizes = kernel_sizes
self.expansion_factor = expansion_factor
self.dw_parallel = dw_parallel
self.add = add
self.n_scales = len(self.kernel_sizes)
# check stride value
assert self.stride in [1, 2]
# Skip connection if stride is 1
self.use_skip_connection = True if self.stride == 1 else False
# expansion factor
self.ex_channels = int(self.in_channels * self.expansion_factor)
self.pconv1 = nn.Sequential(
# pointwise convolution
Conv(self.in_channels, self.ex_channels, 1)
)
self.msdc = MSDC(self.ex_channels, self.kernel_sizes, self.stride, dw_parallel=self.dw_parallel)
if self.add == True:
self.combined_channels = self.ex_channels*1
else:
self.combined_channels = self.ex_channels*self.n_scales
self.pconv2 = nn.Sequential(
# pointwise convolution
Conv(self.combined_channels, self.out_channels, 1, act=False)
)
if self.use_skip_connection and (self.in_channels != self.out_channels):
self.conv1x1 = nn.Conv2d(self.in_channels, self.out_channels, 1, 1, 0, bias=False)
def forward(self, x):
pout1 = self.pconv1(x)
msdc_outs = self.msdc(pout1)
if self.add == True:
dout = 0
for dwout in msdc_outs:
dout = dout + dwout
else:
dout = torch.cat(msdc_outs, dim=1)
dout = self.channel_shuffle(dout, math.gcd(self.combined_channels,self.out_channels))
out = self.pconv2(dout)
if self.use_skip_connection:
if self.in_channels != self.out_channels:
x = self.conv1x1(x)
return x + out
else:
return out
def channel_shuffle(self, x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
class CSP_MSCB(C2f):
def __init__(self, c1, c2, n=1, kernel_sizes=[1,3,5], shortcut=False, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
self.m = nn.ModuleList(MSCB(self.c, self.c, kernel_sizes=kernel_sizes) for _ in range(n))我用这个涨点# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
fusion_mode: bifpn
node_mode: CSP_MSCB
head_channel: 256
# YOLO11n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 2, C3k2, [256, False, 0.25]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 2, C3k2, [512, False, 0.25]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 2, C3k2, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 2, C3k2, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 2, C2PSA, [1024]] # 10
# YOLO11n head
head:
- [4, 1, Conv, [head_channel]] # 11-P3/8
- [6, 1, Conv, [head_channel]] # 12-P4/16
- [10, 1, Conv, [head_channel]] # 13-P5/32
- [12, 1, Conv, [head_channel, 3, 2]] # 14-P5/32
- [[-1, 13], 1, Fusion, [fusion_mode]] # 15
- [-1, 3, node_mode, [head_channel, [5,7,9]]] # 16-P5/32
- [-1, 1, EUCB, []] # 17-P4/16
- [11, 1, Conv, [head_channel, 3, 2]] # 18-P4/16
- [[-1, -2, 12], 1, Fusion, [fusion_mode]] # 19
- [-1, 3, node_mode, [head_channel, [3,5,7]]] # 20-P4/16
- [-1, 1, EUCB, []] # 21-P3/8
- [2, 1, Conv, [head_channel, 3, 2]] # 22-P3/8
- [[-1, -2, 11], 1, Fusion, [fusion_mode]] # 23
- [-1, 3, node_mode, [head_channel, [1,3,5]]] # 24-P3/8
- [[21, -1], 1, Fusion, [fusion_mode]] # 25
- [-1, 3, node_mode, [head_channel, [1,3,5]]] # 26-P3/8
- [24, 1, Conv, [head_channel, 3, 2]] # 27-P4/16
- [26, 1, Conv, [head_channel, 3, 2]] # 28-P4/16
- [[-1, -2, 20, 17], 1, Fusion, [fusion_mode]] # 29-P4/16
- [-1, 3, node_mode, [head_channel, [3,5,7]]] # 30-P4/16
- [20, 1, Conv, [head_channel, 3, 2]] # 31-P5/32
- [30, 1, Conv, [head_channel, 3, 2]] # 32-P5/32
- [[-1, -2, 16], 1, Fusion, [fusion_mode]] # 33-P5/32
- [-1, 3, node_mode, [head_channel, [5,7,9]]] # 34-P5/32
- [[26, 30, 34], 1, Detect, [nc]] # Detect(P3, P4, P5)