TensorFlow基础笔记(11) max_pool2D函数

本文通过TensorFlow实现了一个2D最大池化的示例,展示了如何定义输入、执行最大池化操作,并输出最终结果的尺寸。该示例有助于理解最大池化层的工作原理及其在卷积神经网络中的应用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 

# def max_pool2d(inputs,
#                kernel_size,
#                stride=2,
#                padding='VALID',
#                data_format=DATA_FORMAT_NHWC,
#                outputs_collections=None,
#                scope=None):

#"VALID"模式下
#输出图像大小 out_height = round((in_height - floor(filter_height / 2) * 2) / strides_height) floor表示下取整 round表示四舍五入

input = tf.Variable(tf.round(10 * tf.random_normal([1, 7, 7, 1])))
#filter = tf.Variable(tf.round(5 * tf.random_normal([3, 3, 1, 1])))
#op2 = tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='VALID')
slim_max_pool2d = slim.max_pool2d(input, [3, 3], [1, 1], scope='pool1')
#slim_conv2d_SAME = slim.conv2d(input, 1, [3, 3], [1, 1], weights_initializer=tf.ones_initializer, padding='SAME')
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    slim_max_pool2d_value = \
        sess.run(slim_max_pool2d)
    print(slim_max_pool2d_value.shape)

 

怎么实现yolov8中将cbam和Repulsion Loss损失函数两个模块融合起来,以下分别是我的CBAM模块、CBAM的yaml、Repulsion Loss模块、Repulsion Loss的yaml以及我的训练脚本。CBAM模块:import numpy as np import torch from torch import nn from torch.nn import init class ChannelAttentionModule(nn.Module): def __init__(self, c1, reduction=16): super(ChannelAttentionModule, self).__init__() mid_channel = c1 // reduction self.avg_pool = nn.AdaptiveAvgPool2d(1) self.max_pool = nn.AdaptiveMaxPool2d(1) self.shared_MLP = nn.Sequential( nn.Linear(in_features=c1, out_features=mid_channel), nn.LeakyReLU(0.1, inplace=True), nn.Linear(in_features=mid_channel, out_features=c1) ) self.act = nn.Sigmoid() #self.act=nn.SiLU() def forward(self, x): avgout = self.shared_MLP(self.avg_pool(x).view(x.size(0),-1)).unsqueeze(2).unsqueeze(3) maxout = self.shared_MLP(self.max_pool(x).view(x.size(0),-1)).unsqueeze(2).unsqueeze(3) return self.act(avgout + maxout) class SpatialAttentionModule(nn.Module): def __init__(self): super(SpatialAttentionModule, self).__init__() self.conv2d = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3) self.act = nn.Sigmoid() def forward(self, x): avgout = torch.mean(x, dim=1, keepdim=True) maxout, _ = torch.max(x, dim=1, keepdim=True) out = torch.cat([avgout, maxout], dim=1) out = self.act(self.conv2d(out)) return out class CBAM(nn.Module): def __init__(self, c1,c2): super(CBAM, self).__init__() self.channel_attention = ChannelAttentionModule(c1) self.spatial_attention = SpatialAttentionModule() def forward(self, x): out = self.channel_attention(x) * x out = self.spatial_attention(out) * out return out CBAM的yaml:# Ultralytics YOLO 🚀, AGPL-3.0 license # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect # Parameters nc: 2 # number of classes loss: 'RepulsionLoss' # 关键修改:指定使用Repulsion Loss 2025/7/19改 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' # [depth, width, max_channels] n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs # YOLOv8.0n backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [1024, True]] - [-1, 3, CBAM, [1024]] - [-1, 1, SPPF, [1024, 5]] # 9 # YOLOv8.0n head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 12 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 15 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 13], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 10], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5) Repulsionloss模块:import torch import numpy as np def pairwise_bbox_iou(box1, box2, box_format='xywh'): if box_format == 'xyxy': lt = torch.max(box1[:, None, :2], box2[:, :2]) rb = torch.min(box1[:, None, 2:], box2[:, 2:]) area_1 = torch.prod(box1[:, 2:] - box1[:, :2], 1) area_2 = torch.prod(box2[:, 2:] - box2[:, :2], 1) elif box_format == 'xywh': lt = torch.max( (box1[:, None, :2] - box1[:, None, 2:] / 2), (box2[:, :2] - box2[:, 2:] / 2), ) rb = torch.min( (box1[:, None, :2] + box1[:, None, 2:] / 2), (box2[:, :2] + box2[:, 2:] / 2), ) area_1 = torch.prod(box1[:, 2:], 1) area_2 = torch.prod(box2[:, 2:], 1) valid = (lt < rb).type(lt.type()).prod(dim=2) inter = torch.prod(rb - lt, 2) * valid return inter / (area_1[:, None] + area_2 - inter) def IoG(gt_box, pred_box): inter_xmin = torch.max(gt_box[:, 0], pred_box[:, 0]) inter_ymin = torch.max(gt_box[:, 1], pred_box[:, 1]) inter_xmax = torch.min(gt_box[:, 2], pred_box[:, 2]) inter_ymax = torch.min(gt_box[:, 3], pred_box[:, 3]) Iw = torch.clamp(inter_xmax - inter_xmin, min=0) Ih = torch.clamp(inter_ymax - inter_ymin, min=0) I = Iw * Ih G = ((gt_box[:, 2] - gt_box[:, 0]) * (gt_box[:, 3] - gt_box[:, 1])).clamp(1e-6) return I / G def smooth_ln(x, sigma=0.5): return torch.where( torch.le(x, sigma), -torch.log(1 - x), ((x - sigma) / (1 - sigma)) - np.log(1 - sigma) ) def repulsion_loss(pbox, gtbox, fg_mask, sigma_repgt=0.9, sigma_repbox=0, pnms=0, gtnms=0): # nms=0 loss_repgt = torch.zeros(1).to(pbox.device) loss_repbox = torch.zeros(1).to(pbox.device) bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) bs = 0 pbox = pbox.detach() gtbox = gtbox.detach() for idx in range(pbox.shape[0]): num_pos = bbox_mask[idx].sum() if num_pos <= 0: continue _pbox_pos = torch.masked_select(pbox[idx], bbox_mask[idx]).reshape([-1, 4]) _gtbox_pos = torch.masked_select(gtbox[idx], bbox_mask[idx]).reshape([-1, 4]) bs += 1 pgiou = pairwise_bbox_iou(_pbox_pos, _gtbox_pos, box_format='xyxy') ppiou = pairwise_bbox_iou(_pbox_pos, _pbox_pos, box_format='xyxy') pgiou = pgiou.cuda().data.cpu().numpy() ppiou = ppiou.cuda().data.cpu().numpy() _gtbox_pos_cpu = _gtbox_pos.cuda().data.cpu().numpy() for j in range(pgiou.shape[0]): for z in range(j, pgiou.shape[0]): ppiou[j, z] = 0 if (_gtbox_pos_cpu[j][0] == _gtbox_pos_cpu[z][0]) and (_gtbox_pos_cpu[j][1] == _gtbox_pos_cpu[z][1]) \ and (_gtbox_pos_cpu[j][2] == _gtbox_pos_cpu[z][2]) and ( _gtbox_pos_cpu[j][3] == _gtbox_pos_cpu[z][3]): pgiou[j, z] = 0 pgiou[z, j] = 0 ppiou[z, j] = 0 pgiou = torch.from_numpy(pgiou).to(pbox.device).cuda().detach() ppiou = torch.from_numpy(ppiou).to(pbox.device).cuda().detach() max_iou, _ = torch.max(pgiou, 1) pg_mask = torch.gt(max_iou, gtnms) num_repgt = pg_mask.sum() if num_repgt > 0: pgiou_pos = pgiou[pg_mask, :] _, argmax_iou_sec = torch.max(pgiou_pos, 1) pbox_sec = _pbox_pos[pg_mask, :] gtbox_sec = _gtbox_pos[argmax_iou_sec, :] IOG = IoG(gtbox_sec, pbox_sec) loss_repgt += smooth_ln(IOG, sigma_repgt).mean() pp_mask = torch.gt(ppiou, pnms) num_pbox = pp_mask.sum() if num_pbox > 0: loss_repbox += smooth_ln(ppiou, sigma_repbox).mean() loss_repgt /= bs loss_repbox /= bs torch.cuda.empty_cache() return loss_repgt.squeeze(0), loss_repbox.squeeze(0) Repulsionloss的yaml:# Ultralytics YOLO 🚀, AGPL-3.0 license # YOLOv8 object detection model. More improvement points for YOLOv8, please see https://github.com/iscyy/ultralyticsPro # Parameters nc: 80 # number of classes scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' # [depth, width, max_channels] n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs loss: 'RepulsionLoss' # 举例,如果使用 RepulsionLoss 损失函数的话, 即修改对应的名称 # YOLOv8.0n backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [1024, True]] - [-1, 1, SPPF, [1024, 5]] # 9 # YOLOv8.0n head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 12 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 15 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 18 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [1024]] # 21 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 训练脚本.py:import warnings import envs warnings.filterwarnings('ignore') from ultralytics import YOLO if __name__ == '__main__': model = YOLO(r'F:\Anaconda\anaconda\envs\yolov8_pytorch\yolov8_CBAM.yaml').load(r'F:\Anaconda\anaconda\envs\yolov8_pytorch\测试\yolov8n.pt') model.train( data=r'F:\Anaconda\anaconda\envs\yolov8_pytorch\xunlian2\data.yaml', device="cuda", # 使用GPU(等效于 device=0) epochs=200, # 训练轮次 batch=16, # 根据GPU内存调整(4060笔记本GPU建议8-16) imgsz=640, # 输入图像尺寸 workers=4, # 数据加载线程数 optimizer="auto", # 自动选择优化器 lr0=0.01, # 初始学习率 name='yolov8_cbam_exp6'# 实验名称(可选) ) # 3. 验证(训练完成后自动执行) metrics = model.val() # 在验证集上评估 print(f"mAP@0.5: {metrics.box.map}") # 输出精度指标 print('模型训练完毕')
最新发布
08-05
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值