怎么实现yolov8中将cbam和Repulsion Loss损失函数两个模块融合起来,以下分别是我的CBAM模块、CBAM的yaml、Repulsion Loss模块、Repulsion Loss的yaml以及我的训练脚本。CBAM模块:import numpy as np
import torch
from torch import nn
from torch.nn import init
class ChannelAttentionModule(nn.Module):
def __init__(self, c1, reduction=16):
super(ChannelAttentionModule, self).__init__()
mid_channel = c1 // reduction
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.shared_MLP = nn.Sequential(
nn.Linear(in_features=c1, out_features=mid_channel),
nn.LeakyReLU(0.1, inplace=True),
nn.Linear(in_features=mid_channel, out_features=c1)
)
self.act = nn.Sigmoid()
#self.act=nn.SiLU()
def forward(self, x):
avgout = self.shared_MLP(self.avg_pool(x).view(x.size(0),-1)).unsqueeze(2).unsqueeze(3)
maxout = self.shared_MLP(self.max_pool(x).view(x.size(0),-1)).unsqueeze(2).unsqueeze(3)
return self.act(avgout + maxout)
class SpatialAttentionModule(nn.Module):
def __init__(self):
super(SpatialAttentionModule, self).__init__()
self.conv2d = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3)
self.act = nn.Sigmoid()
def forward(self, x):
avgout = torch.mean(x, dim=1, keepdim=True)
maxout, _ = torch.max(x, dim=1, keepdim=True)
out = torch.cat([avgout, maxout], dim=1)
out = self.act(self.conv2d(out))
return out
class CBAM(nn.Module):
def __init__(self, c1,c2):
super(CBAM, self).__init__()
self.channel_attention = ChannelAttentionModule(c1)
self.spatial_attention = SpatialAttentionModule()
def forward(self, x):
out = self.channel_attention(x) * x
out = self.spatial_attention(out) * out
return out
CBAM的yaml:# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 2 # number of classes
loss: 'RepulsionLoss' # 关键修改:指定使用Repulsion Loss 2025/7/19改
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 3, CBAM, [1024]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
- [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
Repulsionloss模块:import torch
import numpy as np
def pairwise_bbox_iou(box1, box2, box_format='xywh'):
if box_format == 'xyxy':
lt = torch.max(box1[:, None, :2], box2[:, :2])
rb = torch.min(box1[:, None, 2:], box2[:, 2:])
area_1 = torch.prod(box1[:, 2:] - box1[:, :2], 1)
area_2 = torch.prod(box2[:, 2:] - box2[:, :2], 1)
elif box_format == 'xywh':
lt = torch.max(
(box1[:, None, :2] - box1[:, None, 2:] / 2),
(box2[:, :2] - box2[:, 2:] / 2),
)
rb = torch.min(
(box1[:, None, :2] + box1[:, None, 2:] / 2),
(box2[:, :2] + box2[:, 2:] / 2),
)
area_1 = torch.prod(box1[:, 2:], 1)
area_2 = torch.prod(box2[:, 2:], 1)
valid = (lt < rb).type(lt.type()).prod(dim=2)
inter = torch.prod(rb - lt, 2) * valid
return inter / (area_1[:, None] + area_2 - inter)
def IoG(gt_box, pred_box):
inter_xmin = torch.max(gt_box[:, 0], pred_box[:, 0])
inter_ymin = torch.max(gt_box[:, 1], pred_box[:, 1])
inter_xmax = torch.min(gt_box[:, 2], pred_box[:, 2])
inter_ymax = torch.min(gt_box[:, 3], pred_box[:, 3])
Iw = torch.clamp(inter_xmax - inter_xmin, min=0)
Ih = torch.clamp(inter_ymax - inter_ymin, min=0)
I = Iw * Ih
G = ((gt_box[:, 2] - gt_box[:, 0]) * (gt_box[:, 3] - gt_box[:, 1])).clamp(1e-6)
return I / G
def smooth_ln(x, sigma=0.5):
return torch.where(
torch.le(x, sigma),
-torch.log(1 - x),
((x - sigma) / (1 - sigma)) - np.log(1 - sigma)
)
def repulsion_loss(pbox, gtbox, fg_mask, sigma_repgt=0.9, sigma_repbox=0, pnms=0, gtnms=0): # nms=0
loss_repgt = torch.zeros(1).to(pbox.device)
loss_repbox = torch.zeros(1).to(pbox.device)
bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4])
bs = 0
pbox = pbox.detach()
gtbox = gtbox.detach()
for idx in range(pbox.shape[0]):
num_pos = bbox_mask[idx].sum()
if num_pos <= 0:
continue
_pbox_pos = torch.masked_select(pbox[idx], bbox_mask[idx]).reshape([-1, 4])
_gtbox_pos = torch.masked_select(gtbox[idx], bbox_mask[idx]).reshape([-1, 4])
bs += 1
pgiou = pairwise_bbox_iou(_pbox_pos, _gtbox_pos, box_format='xyxy')
ppiou = pairwise_bbox_iou(_pbox_pos, _pbox_pos, box_format='xyxy')
pgiou = pgiou.cuda().data.cpu().numpy()
ppiou = ppiou.cuda().data.cpu().numpy()
_gtbox_pos_cpu = _gtbox_pos.cuda().data.cpu().numpy()
for j in range(pgiou.shape[0]):
for z in range(j, pgiou.shape[0]):
ppiou[j, z] = 0
if (_gtbox_pos_cpu[j][0] == _gtbox_pos_cpu[z][0]) and (_gtbox_pos_cpu[j][1] == _gtbox_pos_cpu[z][1]) \
and (_gtbox_pos_cpu[j][2] == _gtbox_pos_cpu[z][2]) and (
_gtbox_pos_cpu[j][3] == _gtbox_pos_cpu[z][3]):
pgiou[j, z] = 0
pgiou[z, j] = 0
ppiou[z, j] = 0
pgiou = torch.from_numpy(pgiou).to(pbox.device).cuda().detach()
ppiou = torch.from_numpy(ppiou).to(pbox.device).cuda().detach()
max_iou, _ = torch.max(pgiou, 1)
pg_mask = torch.gt(max_iou, gtnms)
num_repgt = pg_mask.sum()
if num_repgt > 0:
pgiou_pos = pgiou[pg_mask, :]
_, argmax_iou_sec = torch.max(pgiou_pos, 1)
pbox_sec = _pbox_pos[pg_mask, :]
gtbox_sec = _gtbox_pos[argmax_iou_sec, :]
IOG = IoG(gtbox_sec, pbox_sec)
loss_repgt += smooth_ln(IOG, sigma_repgt).mean()
pp_mask = torch.gt(ppiou, pnms)
num_pbox = pp_mask.sum()
if num_pbox > 0:
loss_repbox += smooth_ln(ppiou, sigma_repbox).mean()
loss_repgt /= bs
loss_repbox /= bs
torch.cuda.empty_cache()
return loss_repgt.squeeze(0), loss_repbox.squeeze(0) Repulsionloss的yaml:# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model. More improvement points for YOLOv8, please see https://github.com/iscyy/ultralyticsPro
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
loss: 'RepulsionLoss' # 举例,如果使用 RepulsionLoss 损失函数的话, 即修改对应的名称
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
- [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
训练脚本.py:import warnings
import envs
warnings.filterwarnings('ignore')
from ultralytics import YOLO
if __name__ == '__main__':
model = YOLO(r'F:\Anaconda\anaconda\envs\yolov8_pytorch\yolov8_CBAM.yaml').load(r'F:\Anaconda\anaconda\envs\yolov8_pytorch\测试\yolov8n.pt')
model.train(
data=r'F:\Anaconda\anaconda\envs\yolov8_pytorch\xunlian2\data.yaml',
device="cuda", # 使用GPU(等效于 device=0)
epochs=200, # 训练轮次
batch=16, # 根据GPU内存调整(4060笔记本GPU建议8-16)
imgsz=640, # 输入图像尺寸
workers=4, # 数据加载线程数
optimizer="auto", # 自动选择优化器
lr0=0.01, # 初始学习率
name='yolov8_cbam_exp6'# 实验名称(可选)
)
# 3. 验证(训练完成后自动执行)
metrics = model.val() # 在验证集上评估
print(f"mAP@0.5: {metrics.box.map}") # 输出精度指标
print('模型训练完毕')
最新发布