小目标检测作为目标检测研究的重要方向,在智能安防、自动驾驶、医学图像分析以及遥感监测等场景中具有广泛的应用价值[12]。然而小目标通常具有尺度小、纹理弱、易被下采样过程丢失等问题,其技术挑战仍然存在。
Ren 等 [13] 提出的 Faster R-CNN 首次尝试在不同层级特征图上进行检测,但其特征传递机制并未针对小目标进行优化。随后,Lin 等 [14] 提出的 FPN(Feature Pyramid Network)通过自上而下的横向连接与上采样操作,将高层语义特征与低层高分辨率特征融合,显著提升了小目标的检测精度,成为后续多尺度检测方法的核心组件。在此基础上,Liu 等 [15] 提出的 PANet 增加了自下而上的路径聚合通路,进一步强化了低层细节特征向高层的传递,有效改善了小目标的表征能力。与此同时,NAS-FPN [16]、BiFPN [17] 等改进型特征金字塔结构通过自动搜索或加权融合策略,实现了更高效的多尺度特征交互,逐渐成为小目标检测模型的核心组件。
近年来,YOLO 系列作为单阶段检测器的代表,因其高效性在实时目标检测中广泛应用 [18]。早期版本的 YOLO 在小目标检测中存在精度不足的问题,主要受限于下采样造成的特征损失 [19]。为此,YOLOv3 引入多尺度预测机制,在不同尺度特征图上同时检测目标 [20];YOLOv4 与 YOLOv5 融合 CSPNet、PANet 等结构,进一步增强特征表达能力。最新的 YOLOv7 与 YOLOv8 在轻量化与检测精度之间取得平衡,并通过引入更深层次的特征融合与训练策略,使小目标检测性能得到持续提升 [21]。这些改进使 YOLO 系列不仅保持了实时性优势,也逐步缩小了在小目标检测任务中与双阶段检测器的差距。请你作为目标检测专家,润色我写的相关工作部分,并给出润色后的参考文献,降重。下面是两个模块的代码import torch.nn as nn
import torch.nn.functional as F
class ChannelAttention(nn.Module):
"""通道注意力机制,增强重要特征"""
def __init__(self, in_planes, ratio=16):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False),
nn.ReLU(),
nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc(self.avg_pool(x))
max_out = self.fc(self.max_pool(x))
out = avg_out + max_out
return x * self.sigmoid(out) # 直接应用注意力权重到输入
class HaarDownsampling(nn.Module):
"""稳定版Haar小波下采样层"""
def __init__(self, in_ch):
super().__init__()
# 定义Haar小波核
ll_kernel = torch.tensor([[1, 1], [1, 1]], dtype=torch.float32) / 4.0
hl_kernel = torch.tensor([[1, -1], [1, -1]], dtype=torch.float32) / 4.0
lh_kernel = torch.tensor([[1, 1], [-1, -1]], dtype=torch.float32) / 4.0
hh_kernel = torch.tensor([[1, -1], [-1, 1]], dtype=torch.float32) / 4.0
# 组合核并重复用于所有输入通道
kernels = torch.stack([ll_kernel, hl_kernel, lh_kernel, hh_kernel])
kernels = kernels.unsqueeze(1) # [4, 1, 2, 2]
kernels = kernels.repeat(in_ch, 1, 1, 1) # [4*in_ch, 1, 2, 2]
# 注册为不可训练参数
self.register_buffer('weight', kernels)
self.groups = in_ch
# 添加通道注意力
self.ca = ChannelAttention(in_ch * 4)
def forward(self, x):
# 使用分组卷积实现小波变换
x = F.conv2d(
x,
self.weight,
stride=2,
groups=self.groups
)
# 应用通道注意力
return self.ca(x)
class StableDWTBlock(nn.Module):
"""稳定版小波下采样模块"""
def __init__(self, c1, c2, *args, **kwargs):
super().__init__()
self.c1 = c1
self.c2 = c2
# Haar小波变换层
self.haar = HaarDownsampling(c1)
# 残差路径(保留原始特征)
self.residual = nn.Sequential(
nn.Conv2d(c1, c1, kernel_size=3, stride=2, padding=1, groups=c1),
nn.Conv2d(c1, c1, kernel_size=1),
nn.BatchNorm2d(c1),
nn.SiLU(inplace=True)
)
# 特征融合模块 - 避免通道数为0的问题
self.feature_fusion = nn.Sequential(
nn.Conv2d(c1 * 5, c2, kernel_size=1),
nn.BatchNorm2d(c2),
nn.SiLU(inplace=True),
nn.Conv2d(c2, c2, kernel_size=3, padding=1),
nn.BatchNorm2d(c2),
nn.SiLU(inplace=True)
)
# 空间注意力层
self.spatial_att = nn.Sequential(
nn.Conv2d(c2, 1, kernel_size=1),
nn.Sigmoid()
)
# 输出层
self.output_conv = nn.Conv2d(c2, c2, kernel_size=1)
def forward(self, x):
# 原始输入用于残差
identity = x
# Haar小波变换
haar_out = self.haar(x)
B, C, H, W = haar_out.shape
# 调整小波特征形状
haar_out = haar_out.view(B, -1, 4, H, W)
haar_out = haar_out.permute(0, 2, 1, 3, 4)
haar_out = haar_out.reshape(B, -1, H, W) # [B, 4*c1, H/2, W/2]
# 残差路径
res_out = self.residual(identity)
# 融合特征(小波特征 + 残差特征)
fused = torch.cat([haar_out, res_out], dim=1) # [B, 5*c1, H/2, W/2]
# 特征融合处理
features = self.feature_fusion(fused) # [B, c2, H/2, W/2]
# 空间注意力
spatial_map = self.spatial_att(features) # [B, 1, H/2, W/2]
# 应用空间注意力
weighted_features = features * spatial_map
# 输出层
return self.output_conv(weighted_features)
class DWTBlock(nn.Module):
"""渐进式小波下采样模块(训练策略优化)"""
def __init__(self, c1, c2, *args, **kwargs):
super().__init__()
self.c1 = c1
self.c2 = c2
# 标准卷积下采样
self.conv_down = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1),
nn.BatchNorm2d(c2),
nn.SiLU(inplace=True)
)
# 小波下采样模块
self.dwt_block = StableDWTBlock(c1, c2)
# 融合权重(可学习参数)
self.alpha = nn.Parameter(torch.tensor(0.0))
def forward(self, x):
# 标准卷积路径
conv_path = self.conv_down(x)
# 小波路径
dwt_path = self.dwt_block(x)
# 动态融合(训练初期主要使用卷积路径)
return torch.sigmoid(self.alpha) * dwt_path + (1 - torch.sigmoid(self.alpha)) * conv_path
class VGCA(nn.Module):
"""改进版方差引导通道注意力 (带残差连接)"""
def __init__(self, channels, reduction=8):
super().__init__()
self.channels = channels
# 方差路径 - 计算通道方差并生成门控权重
self.var_gate = nn.Sequential(
nn.Linear(channels, channels // reduction),
nn.ReLU(),
nn.Linear(channels // reduction, channels),
nn.Sigmoid()
)
# 空间路径 - 轻量级空间特征增强
self.spatial = nn.Sequential(
nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
nn.BatchNorm2d(channels)
)
# 残差连接后的卷积调整层
self.res_conv = nn.Conv2d(channels, channels, kernel_size=1)
def forward(self, x):
b, c, h, w = x.shape
identity = x # 保存原始输入用于残差连接
# 添加安全检查,避免小尺寸特征图的方差计算问题
if h > 1 and w > 1:
# 计算通道方差
channel_var = x.var(dim=(2, 3), keepdim=True, unbiased=False)
channel_var = torch.log(1 + channel_var) # 数值稳定化
else:
# 当特征图太小无法计算方差时,使用平均值代替
channel_mean = x.mean(dim=(2, 3), keepdim=True)
channel_var = torch.log(1 + channel_mean)
# 生成门控权重
gate = self.var_gate(channel_var.view(b, c)) # [b,c]
gate = gate.view(b, c, 1, 1) # 重塑为[b,c,1,1]
# 空间特征增强
spatial_feat = self.spatial(x)
# 方差指导的特征调制
modulated = gate * spatial_feat
# 残差连接 (添加缩放参数)
res_connection = identity + modulated
# 卷积调整保持维度
output = self.res_conv(res_connection)
return output
def init_param_one(param):
if param is not None:
nn.init.constant_(param, 0.1)
class AMSPP(nn.Module):
def __init__(self, c1, c2, k=5):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 =GhostConv(c1, c_, 1, 1)
self.cv2 =GhostConv(c_ * 4, c2, 1, 1)
self.cv = GhostConv(c2, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
self.a = nn.AvgPool2d(kernel_size=k, stride=1, padding=k // 2)
self.act = nn.Sigmoid()
self.A3d = VGCA(channels=c2)
# self.A3d = SEAttention(channel=c2)
self.norm = nn.BatchNorm2d(c2)
self.s = nn.SiLU()
self.alpha = torch.nn.Parameter(torch.Tensor([0.1]))
self.reset_parameters()
def reset_parameters(self):
init_param_one(self.alpha)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.cv1(x)
x1, x2 = torch.split(x, x.size(1) // 2, dim=1)
y1_m = self.m(x1)
y2_m = self.m(y1_m)
y3_m = self.m(y2_m)
y1_a = self.a(x2)
y2_a = self.a(y1_a)
y3_a = self.a(y2_a)
y3_m = y3_m - self.alpha * (y1_m + y2_m)
y3_a = y3_a - self.alpha * (y1_a + y2_a)
z = self.cv2(torch.cat((x1, y1_m, y2_m, y3_m, x2, y1_a, y2_a, y3_a), dim=1))
return self.s(self.norm(self.cv(self.A3d(z))))
最新发布