(超详细)YOLOV7改进-添加SAConv

1.在common.py文件最后加上下面代码
在这里插入图片描述

class ConvAWS2d(nn.Conv2d):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True):
        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias)
        self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1))
        self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1))

    def _get_weight(self, weight):
        weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2,
                                  keepdim=True).mean(dim=3, keepdim=True)
        weight = weight - weight_mean
        std = torch.sqrt(weight.view(weight.size(0), -1).var(dim=1) + 1e-5).view(-1, 1, 1, 1)
        weight = weight / std
        weight = self.weight_gamma * weight + self.weight_beta
        return weight

    def forward(self, x):
        weight = self._get_weight(self.weight)
        return super()._conv_forward(x, weight, None)

    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        self.weight_gamma.data.fill_(-1)
        super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
                                      missing_keys, unexpected_keys, error_msgs)
        if self.weight_gamma.data.mean() > 0:
            return
        weight = self.weight.data
        weight_mean = weight.data.mean(dim=1, keepdim=True).mean(dim=2,
                                       keepdim=True).mean(dim=3, keepdim=True)
        self.weight_beta.data.copy_(weight_mean)
        std = torch.sqrt(weight.view(weight.size(0), -1).var(dim=1) + 1e-5).view(-1, 1, 1, 1)
        self.weight_gamma.data.copy_(std)
    
class SAConv2d(ConvAWS2d):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 s=1,
                 p=None,
                 g=1,
                 d=1,
                 act=True,
                 bias=True):
        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride=s,
            padding=autopad(kernel_size, p),
            dilation=d,
            groups=g,
            bias=bias)
        self.switch = torch.nn.Conv2d(
            self.in_channels,
            1,
            kernel_size=1,
            stride=s,
            bias=True)
        self.switch.weight.data.fill_(0)
        self.switch.bias.data.fill_(1)
        self.weight_diff = torch.nn.Parameter(torch.Tensor(self.weight.size()))
        self.weight_diff.data.zero_()
        self.pre_context = torch.nn.Conv2d(
            self.in_channels,
            self.in_channels,
            kernel_size=1,
            bias=True)
        self.pre_context.weight.data.fill_(0)
        self.pre_context.bias.data.fill_(0)
        self.post_context = torch.nn.Conv2d(
            self.out_channels,
            self.out_channels,
            kernel_size=1,
            bias=True)
        self.post_context.weight.data.fill_(0)
        self.post_context.bias.data.fill_(0)
        
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        # pre-context
        avg_x = torch.nn.functional.adaptive_avg_pool2d(x, output_size=1)
        avg_x = self.pre_context(avg_x)
        avg_x = avg_x.expand_as(x)
        x = x + avg_x
        # switch
        avg_x = torch.nn.functional.pad(x, pad=(2, 2, 2, 2), mode="reflect")
        avg_x = torch.nn.functional.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0)
        switch = self.switch(avg_x)
        # sac
        weight = self._get_weight(self.weight)
        out_s = super()._conv_forward(x, weight, None)
        ori_p = self.padding
        ori_d = self.dilation
        self.padding = tuple(3 * p for p in self.padding)
        self.dilation = tuple(3 * d for d in self.dilation)
        weight = weight + self.weight_diff
        out_l = super()._conv_forward(x, weight, None)
        out = switch * out_s + (1 - switch) * out_l
        self.padding = ori_p
        self.dilation = ori_d
        # post-context
        avg_x = torch.nn.functional.adaptive_avg_pool2d(out, output_size=1)
        avg_x = self.post_context(avg_x)
        avg_x = avg_x.expand_as(out)
        out = out + avg_x
        return self.act(self.bn(out))

2.修改yolo.py文件
加一个红色框内的。
在这里插入图片描述
3.修改yolov7.yaml文件
在这里插入图片描述
4.运行
在这里插入图片描述

### YOLOv7 改进方法与性能优化 YOLOv7 是一种高效的目标检测框架,在多个方面进行了改进以提高其性能和功能。以下是几种主要的改进方向及其具体实施方式: #### 1. 架构优化 YOLOv7 提出了全新的架构设计,该架构在私有数据集上的表现尤为突出[^1]。这种新架构不仅提升了模型的速度,还增强了检测精度。通过引入更有效的特征提取机制以及多尺度融合技术,YOLOv7 能够更好地捕捉不同大小目标的信息。 #### 2. 数据增强策略 为了进一步提升模型的表现力,可以采用先进的数据增强手段。这些手段包括但不限于 Mosaic 增强、CutMix 技术等。Mosaic 增强能够将四张图片拼接成一张训练样本,从而增加场景多样性;而 CutMix 则通过对两张图像的部分区域进行混合来生成新的训练样例[^3]。 #### 3. 高效视觉 Transformer (EfficientViT) 借鉴 EfficientViT 的设计理念,可以在 YOLOv7 中融入高效的视觉 Transformer 组件。这种方法结合了传统卷积神经网络(CNN)的优势与现代 Transformer 的强大表达能力,有助于构建更加轻量级却依然保持高性能的模型结构[^2]。 #### 4. 自定义损失函数调整 针对特定应用场景下的需求,可能需要重新设计或者微调现有的损失函数形式。例如,对于某些类别不平衡严重的情况,可以通过修改分类分支中的交叉熵损失项权重分配规则等方式缓解这一问题的影响。 #### 示例代码:自定义损失函数实现 下面展示了一个简单的例子,说明如何基于 PyTorch 实现带加权因子的二元交叉熵计算过程。 ```python import torch.nn as nn class WeightedBCELoss(nn.Module): def __init__(self, pos_weight=1., neg_weight=1.): super(WeightedBCELoss, self).__init__() self.pos_weight = pos_weight self.neg_weight = neg_weight def forward(self, pred, target): loss = -(target * self.pos_weight * torch.log(pred.clamp(min=1e-8)) + (1-target)*self.neg_weight*torch.log((1-pred).clamp(min=1e-8))) return loss.mean() ``` ### 结论 综上所述,通过上述几个方面的努力——从基础架构层面到高级算法技巧的应用实践,都可以有效促进 YOLOv7 在实际项目当中的应用效果达到一个新的高度。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小兔子要健康

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值