1,本文介绍
这篇文章的改进机制是利用新推出的 渐近特征金字塔网络(AFPN)来优化yolov8的检测头, AFPN的核心 是引入一种渐近的特征融合策略,将底层和高层的特征逐渐整合到目标检测过程中。这种方式有助于减小不同层次特征之间的语义差距,提高特征融合效果,使得检测模型能更好地适应不同层次的语义信息。
关于AFPN的详细介绍可以看论文:https://arxiv.org/pdf/2306.15988.pdf
本文将讲解如何将AFPN融合进yolov8,以提高小目标检测的性能。
话不多说,上代码!
2,将AFPN融入YOLOv8
2.1 步骤一
首先找到如下的目录'ultralytics/nn/modules',然后在这个目录下创建一个afpn.py文件,文件名字可以根据你自己的习惯起,然后将afpn的核心代码复制进去。
# AFPN 核心代码
import math
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
from ultralytics.nn.modules import DFL
from ultralytics.nn.modules.conv import Conv
from ultralytics.utils.tal import dist2bbox, make_anchors
__all__ =['Detect_AFPN']
def BasicConv(filter_in, filter_out, kernel_size, stride=1, pad=None):
if not pad:
pad = (kernel_size - 1) // 2 if kernel_size else 0
else:
pad = pad
return nn.Sequential(OrderedDict([
("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)),
("bn", nn.BatchNorm2d(filter_out)),
("relu", nn.ReLU(inplace=True)),
]))
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, filter_in, filter_out):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(filter_in, filter_out, 3, padding=1)
self.bn1 = nn.BatchNorm2d(filter_out, momentum=0.1)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(filter_out, filter_out, 3, padding=1)
self.bn2 = nn.BatchNorm2d(filter_out, momentum=0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Upsample(nn.Module):
def __init__(self, in_channels, out_channels, scale_factor=2):
super(Upsample, self).__init__()
self.upsample = nn.Sequential(
BasicConv(in_channels, out_channels, 1),
nn.Upsample(scale_factor=scale_factor, mode='bilinear')
)
def forward(self, x):
x = self.upsample(x)
return x
class Downsample_x2(nn.Module):
def __init__(self, in_channels, out_channels):
super(Downsample_x2, self).__init__()
self.downsample = nn.Sequential(
BasicConv(in_channels, out_channels, 2, 2, 0)
)
def forward(self, x, ):
x = self.downsample(x)
return x
class Downsample_x4(nn.Module):
def __init__(self, in_channels, out_channels):
super(Downsample_x4, self).__init__()
self.downsample = nn.Sequential(
BasicConv(in_channels, out_channels, 4, 4, 0)
)
def forward(self, x, ):
x = self.downsample(x)
return x
class Downsample_x8(nn.Module):
def __init__(self, in_channels, out_channels):
super(Downsample_x8, self).__init__()
self.downsample = nn.Sequential(
BasicConv(in_channels, out_channels, 8, 8, 0)
)
def forward(self, x, ):
x = self.downsample(x)
return x
class ASFF_2(nn.Module):
def __init__(self, inter_dim=512):
super(ASFF_2, self).__init__()
self.inter_dim = inter_dim
compress_c = 8
self.weight_level_1 = BasicConv(self.inter_dim, compress_c, 1, 1)
self.weight_level_2 = BasicConv(self.inter_dim, compress_c, 1, 1)
self.weight_levels = nn.Conv2d(compress_c * 2, 2, kernel_size=1, stride=1, padding=0)
self.conv = BasicConv(self.inter_dim, self.inter_dim, 3, 1)
def forward(self, input1, input2):
level_1_weight_v = self.weight_level_1(input1)
level_2_weight_v = self.weight_level_2(input2)
levels_weight_v = torch.cat((level_1_weight_v, level_2_weight_v), 1)
levels_weight = self.weight_levels(levels_weight_v)
levels_weight = F.softmax(levels_weight, dim=1)
fused_out_reduced = input1 * levels_weight[:, 0:1, :, :] + \
input2 * levels_weight[:, 1:2