本文介绍
为提升 YOLOv8 在目标检测任务中的特征表达能力,本文借鉴 CVPR2025 FDConv 所提出的Frequency Dynamic Convolution(FDConv)模块改进YOLOv8的下采样层。 针对现有动态卷积(Dynamatic Convolution)存在的参数开销大且自适应性受限问题,FDConv通过在傅里叶域学习固定参数量来缓解上述问题。具体而言,FDConv将参数划分为基于不同频率的组别,各个组别具有互不相交的傅里叶指数,从而在不增加参数成本的前提下构建出频率多样化的权重。 具体提出了两个重要模块:Kernel Spatial Modulation(KSM)和Frequency Band Modulation(FBM)。KSM在空间层面动态调整滤波器的频率响应,而FBM在频域将权重分解为不同频带,并根据局部内容对其进行动态调制。实验结果如下(本文通过VOC数据验证算法性能,epoch为100,batchsize为32,imagesize为640*640):
| Model | mAP50-95 | mAP50 | run time (h) | params (M) | interence time (ms) |
|---|---|---|---|---|---|
| YOLOv8 | 0.549 | 0.760 | 1.051 | 3.01 | 0.2+0.3(postprocess) |
| YOLO11 | 0.553 | 0.757 | 1.142 | 2.59 | 0.2+0.3(postprocess) |
| yolov8_FDConv | 0.554 | 0.767 | 1.932 | 3.11 | 0.8+0.3(postprocess) |

重要声明:本文改进后代码可能只是并不适用于我所使用的数据集,对于其他数据集可能存在有效性。
本文改进是为了降低最新研究进展至YOLO的代码迁移难度,从而为对最新研究感兴趣的同学提供参考。
代码迁移
重点内容
步骤一:迁移代码
ultralytics框架的模块代码主要放在ultralytics/nn文件夹下,此处为了与官方代码进行区分,可以新增一个extra_modules文件夹,然后新建文件添加以下代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from numpy.linalg import matrix_rank
from torch.utils.checkpoint import checkpoint
from torch import Tensor
import torch.nn.functional as F
import math
__all__ = ['FDConv']
class StarReLU(nn.Module):
"""
StarReLU: s * relu(x) ** 2 + b
"""
def __init__(self, scale_value=1.0, bias_value=0.0,
scale_learnable=True, bias_learnable=True,
mode=None, inplace=False):
super().__init__()
self.inplace = inplace
self.relu = nn.ReLU(inplace=inplace)
self.scale = nn.Parameter(scale_value * torch.ones(1),
requires_grad=scale_learnable)
self.bias = nn.Parameter(bias_value * torch.ones(1),
requires_grad=bias_learnable)
def forward(self, x):
return self.scale * self.relu(x) ** 2 + self.bias
class KernelSpatialModulation_Global(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, groups=1, reduction=0.0625, kernel_num=4, min_channel=16,
temp=1.0, kernel_temp=None, kernel_att_init='dyconv_as_extra', att_multi=2.0, ksm_only_kernel_att=False, att_grid=1, stride=1, spatial_freq_decompose=False,
act_type='sigmoid'):
super(KernelSpatialModulation_Global, self).__init__()
attention_channel = max(int(in_planes * reduction), min_channel)
self.act_type = act_type
self.kernel_size = kernel_size
self.kernel_num = kernel_num
self.temperature = temp
self.kernel_temp = kernel_temp
self.ksm_only_kernel_att = ksm_only_kernel_att
# self.temperature = nn.Parameter(torch.FloatTensor([temp]), requires_grad=True)
self.kernel_att_init = kernel_att_init
self.att_multi = att_multi
# self.kn = nn.Parameter(torch.FloatTensor([kernel_num]), requires_grad=True)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.att_grid = att_grid
self.fc = nn.Conv2d(in_planes, attention_channel, 1, bias=False)
# self.bn = nn.Identity()
self.bn = nn.BatchNorm2d(attention_channel)
# self.relu = nn.ReLU(inplace=True)
self.relu = StarReLU()
# self.dropout = nn.Dropout2d(p=0.1)
# self.sp_att = SpatialGate(stride=stride, out_channels=1)
# self.attup = AttUpsampler(inplane=in_planes, flow_make_k=1)
self.spatial_freq_decompose = spatial_freq_decompose
# self.channel_compress = ChannelPool()
# self.channel_spatial = BasicConv(
# # 2, 1, 7, stride=1, padding=(7 - 1) // 2, relu=False
# 2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False
# )
# self.filter_spatial = BasicConv(
# # 2, 1, 7, stride=stride, padding=(7 - 1) // 2, relu=False
# 2, 1, kernel_size, stride=stride, padding=(kernel_size - 1) // 2, relu=False
# )
if ksm_only_kernel_att:
self.func_channel = self.skip
else:
if spatial_freq_decompose:
self.channel_fc = nn.Conv2d(attention_channel, in_planes * 2 if self.kernel_size > 1 else in_planes, 1, bias=True)
else:
self.channel_fc = nn.Conv2d(attention_channel, in_planes, 1, bias=True)
# self.channel_fc_bias = nn.Parameter(torch.zeros(1, in_planes, 1, 1), requires_grad=True)
self.func_channel = self.get_channel_attention
if (in_planes == groups and in_planes == out_planes) or self.ksm_only_kernel_att: # depth-wise convolution
self.func_filter = self.skip
else:
if spatial_freq_decompose:
self.filter_fc = nn.Conv2d(attention_channel, out_planes * 2, 1, stride=stride, bias=True)
else:
self.filter_fc = nn.Conv2d(attention_channel, out_planes, 1, stride=stride, bias=True)
# self.filter_fc_bias = nn.Parameter(torch.zeros(1, in_planes, 1, 1), requires_grad=True)
self.func_filter = self.get_filter_attention
if kernel_size == 1 or self.ksm_only_kernel_att: # point-wise convolution
self.func_spatial = self.skip
else:
self.spatial_fc = nn.Conv2d(attention_channel, kernel_size * kernel_size, 1, bias=True)
self.func_spatial = self.get_spatial_attention
if kernel_num == 1:
self.func_kernel = self.skip
else:
# self.kernel_fc = nn.Conv2d(attention_channel, kernel_num * kernel_size * kernel_size, 1, bias=True)
self.kernel_fc = nn.Conv2d(attention_channel, kernel_num, 1, bias=True)
self.func_kernel = self.get_kernel_attention
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if hasattr(self, 'channel_spatial'):
nn.init.normal_(self.channel_spatial.conv.weight, std=1e-6)
if hasattr(self, 'filter_spatial'):
nn.init.normal_(self.filter_spatial.conv.weight, std=1e-6)
if hasattr(self, 'spatial_fc') and isinstance(self.spatial_fc, nn.Conv2d):
# nn.init.constant_(self.spatial_fc.weight, 0)
nn.init.normal_(self.spatial_fc.weight, std=1e-6)
# self.spatial_fc.weight *= 1e-6
if self.kernel_att_init == 'dyconv_as_extra':
pass
else:
# nn.init.constant_(self.spatial_fc.weight, 0)
# nn.init.constant_(self.spatial_fc.bias, 0)
pass
if hasattr(self, 'func_filter') and isinstance(self.func_filter, nn.Conv2d):
# nn.init.constant_(self.func_filter.weight, 0)
nn.init.normal_(self.func_filter.weight, std=1e-6)
# self.func_filter.weight *= 1e-6
if self.kernel_att_init == 'dyconv_as_extra':
pass
else:
# nn.init.constant_(self.func_filter.weight, 0)
# nn.init.constant_(self.func_filter.bias, 0)
pass
if hasattr(self, 'kernel_fc') and isinstance(self.kernel_fc, nn.Conv2d):
# nn.init.constant_(self.kernel_fc.weight, 0)
nn.init.normal_(self.kernel_fc.weight, std=1e-6)
if self.kernel_att_init == 'dyconv_as_extra':
pass
# nn.init.constant_(self.kernel_fc.weight, 0)
# nn.init.constant_(self.kernel_fc.bias, -10)
# nn.init.constant_(self.kernel_fc.weight[0], 6)
# nn.init.constant_(self.kernel_fc.weight[1:], -6)
else:
# nn.init.constant_(self.kernel_fc.weight, 0)
# nn.init.constant_(self.kernel_fc.bias, 0)
# nn.init.constant_(self.kernel_fc.bias, -10)
# nn.init.constant_(self.kernel_fc.bias[0], 10)
pass
if hasattr(self, 'channel_fc') and isinstance(self.channel_fc, nn.Conv2d):
# nn.init.constant_(self.channel_fc.weight, 0)
nn.init.normal_(self.channel_fc.weight, std=1e-6)
# nn.init.constant_(self.channel_fc.bias[1], 6)
# nn.init.constant_(self.channel_fc.bias, 0)
if self.kernel_att_init == 'dyconv_as_extra':
pass
else:
# nn.init.constant_(self.channel_fc.weight, 0)
# nn.init.constant_(self.channel_fc.bias, 0)
pass
def update_temperature(self, temperature):
self.temperature = temperature
@staticmethod
def skip(_):
return 1.0
def get_channel_attention(self, x):
if self.act_type =='sigmoid':
channel_attention = torch.sigmoid(self.channel_fc(x).view(x.size(0), 1, 1, -1, x.size(-2), x.size(-1)) / self.temperature) * self.att_multi # b, kn, cout, cin, k, k
elif self.act_type =='tanh':
channel_attention = 1 + torch.tanh_(self.channel_fc(x).view(x.size(0), 1, 1, -1, x.size(-2), x.size(-1)) / self.temperature) # b, kn, cout, cin, k, k
else:

最低0.47元/天 解锁文章
321

被折叠的 条评论
为什么被折叠?



