CBAM

# import numpy as np
# import tensorflow as tf
import keras
# import keras.backend as K
import keras.layers as KL

# 判断输入数据格式,是channels_first还是channels_last
channel_axis = 1 if K.image_data_format() == "channels_first" else 3

# CAM
def channel_attention(input_xs, reduction_ratio=0.125):
    # get channel
    channel = int(input_xs.shape[channel_axis])
    maxpool_channel = KL.GlobalMaxPooling2D()(input_xs)
    maxpool_channel = KL.Reshape((1, 1, channel))(maxpool_channel)
    avgpool_channel = KL.GlobalAvgPool2D()(input_xs)
    avgpool_channel = KL.Reshape((1, 1, channel))(avgpool_channel)
    Dense_One = KL.Dense(units=int(channel * reduction_ratio), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros')
    Dense_Two = KL.Dense(units=int(channel), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros')
    # max path
    mlp_1_max = Dense_One(maxpool_channel)
    mlp_2_max = Dense_Two(mlp_1_max)
    mlp_2_max = KL.Reshape(target_shape=(1, 1, int(channel)))(mlp_2_max)
    # avg path
    mlp_1_avg = Dense_One(avgpool_channel)
    mlp_2_avg = Dense_Two(mlp_1_avg)
    mlp_2_avg = KL.Reshape(target_shape=(1, 1, int(channel)))(mlp_2_avg)
    channel_attention_feature = KL.Add()([mlp_2_max, mlp_2_avg])
    channel_attention_feature = KL.Activation('sigmoid')(channel_attention_feature)
    return KL.Multiply()([channel_attention_feature, input_xs])

# SAM
def spatial_attention(channel_refined_feature):
    maxpool_spatial = KL.Lambda(lambda x: K.max(x, axis=3, keepdims=True))(channel_refined_feature)
    avgpool_spatial = KL.Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(channel_refined_feature)
    max_avg_pool_spatial = KL.Concatenate(axis=3)([maxpool_spatial, avgpool_spatial])
    return KL.Conv2D(filters=1, kernel_size=(3, 3), padding="same", activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(max_avg_pool_spatial)


def cbam_module(input_xs, reduction_ratio=0.5):
    channel_refined_feature = channel_attention(input_xs, reduction_ratio=reduction_ratio)
    spatial_attention_feature = spatial_attention(channel_refined_feature)
    refined_feature = KL.Multiply()([channel_refined_feature, spatial_attention_feature])
    # return KL.Add()([refined_feature, input_xs])
    return refined_feature

CBAM(Convolutional Block Attention Module)是一种用于卷积神经网络的注意力模块。在引用中多处提及与CBAM相关的内容。 在Ultralytics YOLO项目中可集成CBAM注意力模块,成功集成需要理解YOLO的架构设计原理和参数解析机制,这种集成方法能避免常见陷阱,有效提升模型性能,且不仅适用于CBAM,还可为其他自定义模块的集成提供参考 [^1]。 在农业AI领域,有基于CBAM提出的CBAM - STN - TPS - YOLO模型,该模型通过三大核心技术革新,实现了精准农业检测的新突破 [^2]。 从技术实现角度,在PyTorch中可实现CBAM模块。以下是一个简单的代码示例 [^3]: ```python import torch import torch.nn as nn class CBAM(nn.Module): def __init__(self, in_channels, reduction_ratio=16): super(CBAM, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.max_pool = nn.AdaptiveMaxPool2d(1) self.fc1 = nn.Linear(in_channels, in_channels // reduction_ratio) self.fc2 = nn.Linear(in_channels // reduction_ratio, in_channels) self.conv_after_concat = nn.Conv2d(in_channels * 2, in_channels, kernel_size=1, stride=1) def forward(self, x): # calculate channel attention weight avg_out = self.avg_pool(x) avg_out = self.fc1(avg_out.view(avg_out.size(0), -1)) avg_out = nn.ReLU()(avg_out) avg_out = self.fc2(avg_out) avg_out = nn.Sigmoid()(avg_out) max_out = self.max_pool(x) max_out = self.fc1(max_out.view(max_out.size(0), -1)) max_out = nn.ReLU()(max_out) max_out = self.fc2(max_out) max_out = nn.Sigmoid()(max_out) # 后续代码可能在引用未完整给出 # 假设这里继续处理空间注意力等 # ...... return x ``` 在去噪算法相关引用中还给出了通道注意力的计算公式:\(M_C(F) = \sigma(MLP(AvgPool(F)) + MLP(MaxPool(F)))\),其中 \(\sigma\) 代表激活函数,例如ReLU [^4]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值