修改1:Conv替换LAE
0)LSM-YOLO提出的轻量化自适应特征提取(LAE)模块,旨在多尺度、自适应的提取特征,全局与局部特征融合,并降低计算成本。
核心代码(fxlae.py,可复制下面代码):
| import torch import torch.nn as nn from einops import rearrange
__all__ = ['LAE', 'MSFM']
def autopad(k, p=None, d=1): # kernel, padding, dilation """Pad to 'same' shape outputs.""" if d > 1: k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad return p
class Conv(nn.Module): """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): """Initialize Conv layer with given arguments including activation.""" super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x): """Apply convolution, batch normalization and activation to input tensor.""" return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x): """Perform transposed convolution of 2D data.""" return self.act(self.conv(x))
class LAE(nn.Module): # Light-weight Adaptive Extraction def __init__(self, ch, group=16) -> None: super().__init__()
self.softmax = nn.Softmax(dim=-1) self.attention = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), Conv(ch, ch, k=1) )
self.ds_conv = Conv(ch, ch * 4, k=3, s=2, g=(ch // group))
def forward(self, x): # bs, ch, 2*h, 2*w => bs, ch, h, w, 4 att = rearrange(self.attention(x), 'bs ch (s1 h) (s2 w) -> bs ch h w (s1 s2)', s1=2, s2=2) att = self.softmax(att)
# bs, 4 * ch, h, w => bs, ch, h, w, 4 x = rearrange(self.ds_conv(x), 'bs (s ch) h w -> bs ch h w s', s=4) x = torch.sum(x * att, dim=-1) return x
class MatchNeck_Inner(nn.Module): def __init__(self, channels) -> None: super().__init__()
self.gap = nn.Sequential( nn.AdaptiveAvgPool2d((1, 1)), Conv(channels, channels) ) self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) self.pool_w = nn.AdaptiveAvgPool2d((1, None)) self.conv_hw = Conv(channels, channels, (3, 1)) self.conv_pool_hw = Conv(channels, channels, 1)
def forward(self, x): _, _, h, w = x.size() x_pool_h, x_pool_w, x_pool_ch = self.pool_h(x), self.pool_w(x).permute(0, 1, 3, 2), self.gap(x) x_pool_hw = torch.cat([x_pool_h, x_pool_w], dim=2) x_pool_h, x_pool_w = torch.split(x_pool_hw, [h, w], dim=2) x_pool_hw_weight = x_pool_hw.sigmoid() x_pool_h_weight, x_pool_w_weight = torch.split(x_pool_hw_weight, [h, w], dim=2) x_pool_h, x_pool_w = x_pool_h * x_pool_h_weight, x_pool_w * x_pool_w_weight x_pool_ch = x_pool_ch * torch.mean(x_pool_hw_weight, dim=2, keepdim=True) return x * x_pool_h.sigmoid() * x_pool_w.permute(0, 1, 3, 2).sigmoid() * x_pool_ch.sigmoid()
class MatchNeck(nn.Module): def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 self.MN = MatchNeck_Inner(c2)
def forward(self, x): return x + self.MN(self.cv2(self.cv1(x))) if self.add else self.MN(self.cv2(self.cv1(x)))
class MSFM(nn.Module): def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): super().__init__() self.c = int(c2 * e) # hidden channels self.cv1 = Conv(c1, 2 * self.c, 1, 1) self.cv2 = Conv((2 + n) * self.c, c2, 1) self.m = nn.ModuleList(MatchNeck(self.c, self.c, shortcut, g, k=(3, 3), e=1.0) for _ in range(n))
def forward(self, x): y = list(self.cv1(x).chunk(2, 1)) y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1))
def forward_split(self, x): y = list(self.cv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1))
if __name__ == "__main__": # Generating Sample image image_size = (1, 64, 224, 224) image = torch.rand(*image_size) # Model model = LAE(64) out = model(image) print(out.size()) |
执行上面程序,可以看出:(1, 64, 224, 224)→(1, 64, 112, 112),说明通道数没变,h、w减小1/2。
1)将fxlae.py文件放到ultralytics/nn/modules文件夹中。
2)修改ultralytics/nn/modules/__init__.py,导入LAE模块,即增加88行和99行的内容。

3)修改ultralytics/nn/tasks.py,有两处,即增加19行、1038-1039行的内容。


4)以yolo11.yaml文件为基础重新写一个yolo11-fxlae.yaml文件,位置如下:
ultralytics/cfg/models/11/yolo11-fxlae.yaml
| # Parameters nc: 80 # number of classes scales: # model compound scaling constants # [depth, width, max_channels] n: [0.50, 0.25, 1024] s: [0.50, 0.50, 1024] m: [0.50, 1.00, 512] l: [1.00, 1.00, 512] x: [1.00, 1.50, 512] # YOLO11n-fxlae backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 2, C3k2, [256, False, 0.25]] - [-1, 1, LAE, []] # 3-P3/8 - [-1, 2, C3k2, [512, False, 0.25]] - [-1, 1, LAE, []] # 5-P4/16 - [-1, 2, C3k2, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 2, C3k2, [1024, True]] |

最低0.47元/天 解锁文章
3万+

被折叠的 条评论
为什么被折叠?



