注意力机制之CBAM

import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)
            )
        self.pool_types = pool_types
    def forward(self, x):
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( avg_pool )
            elif pool_type=='max':
                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( max_pool )
            elif pool_type=='lp':
                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( lp_pool )
            elif pool_type=='lse':
                # LSE pool only
                lse_pool = logsumexp_2d(x)
                channel_att_raw = self.mlp( lse_pool )

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw

        scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
        return x * scale

def logsumexp_2d(tensor):
    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
    return outputs

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
    def forward(self, x):
        x_compress = self.compress(x)
        x_out = self.spatial(x_compress)
        scale = F.sigmoid(x_out) # broadcasting
        return x * scale

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial=no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()
    def forward(self, x):
        x_out = self.ChannelGate(x)
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)
        return x_out
inp=np.random.randint(1,10,(4,64,32,32))
batch,wei,hight,width=inp.shape[0],inp.shape[1],inp.shape[2],inp.shape[3]
# wei=64 #通道数
# batch=4 #batch
# hight=width=32#尺寸
cbam=CBAM(wei)


inputs=torch.tensor(inp,dtype=torch.float32)
print(cbam(inputs),inputs)

CBAM

### SE注意力机制CBAM的工作原理 #### SE注意力机制工作原理 SE注意力机制通过两个主要操作来增强网络的表现力:挤压(Squeeze)和激励(Excitation)[^1]。具体来说,在给定特征图的情况下,SE模块会先全局池化这些特征得到通道描述符;接着利用全连接层构建各通道的重要性权重,并通过sigmoid函数映射至0~1区间内表示每个通道应被赋予的关注度得分。 ```python class SELayer(nn.Module): def __init__(self, channel, reduction=16): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel), nn.Sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1, 1) return x * y.expand_as(x) ``` #### CBAM工作原理 Convolutional Block Attention Module(CBAM),则是在空间维度以及信道维度上分别施加注意力建模。对于输入特征图而言,CBAM首先计算出对应于每一个位置的空间注意力分布,再乘回原特征图实现自适应调整;之后同样针对不同卷积核响应生成相应的信道关注度向量并作用回去完成整个过程。 ```python class BasicConv(nn.Module): ... class ChannelGate(nn.Module): ... class SpatialGate(nn.Module): ... class CBAM(nn.Module): def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']): ... def forward(self, x): x_out = self.channel_gate(x) x_out = self.spatial_gate(x_out) return x + x_out ``` ### 工作方式上的差异 - **处理维度的不同** - SENet仅关注于信道间的关系建模,而忽略了像素级别的关联性。 - CBAM不仅考虑到了信道层面的信息交互还加入了对单张图片内部结构特性的捕捉即空间域内的依赖关系提取。 - **参数数量对比** - 相较之下,由于引入了额外的一维卷积用于获取更精细的位置敏感型表征,因此通常情况下CBAM所占用的内存开销要大于单纯的SE模块。 ### 应用场景分析 当面对诸如目标识别或者细粒度分类等问题时,如果数据集规模较小且类别之间存在较大重叠,则可以优先尝试采用轻量化设计思路下的SENet以减少过拟合风险的同时提高泛化能力。而对于涉及复杂背景干扰较多的任务比如语义分割或是姿态估计等领域,考虑到模型需要具备更强的感受野范围去理解上下文环境变化规律,此时选用综合性能更为出色的CBAM将会是一个不错的选择。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值