U-net

该代码实现了一个基于PyTorch的U-net模型,用于图像分割任务。模型包括InConv(输入卷积)、Down(下采样层,含最大池化和双卷积)、Up(上采样层,含转置卷积或双线性插值)和OutConv(输出层)。通过双卷积层进行特征提取,结合上采样和下采样的结构,以处理不同尺度的信息。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init


class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU ) * 2"""
    
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        # 如果未指定中间通道数,就将其设为输出通道数
        if not mid_channels:
            mid_channels = out_channels
        # 定义双卷积层
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""
    
    def __init__(self, in_channels, out_channels):
        super().__init__()
        # 定义一个包含最大池化层和双卷积层的Sequential模块
        self.maxpool_conv = nn.Sequential(
            # 步长默认为2,和kernel_size
            nn.MaxPool2d(2),  # 最大池化层,核大小为2  B*C*H*W => B*C*H/2*W/2,将尺寸分别减半了,但是通道数不变
            DoubleConv(in_channels, out_channels)
        )
    
    def forward(self, x):
        return self.maxpool_conv(x)

class up(nn.Module):
    """
    up path
    conv_transpose => double_conv
    """
    def __init__(self, in_ch, out_ch, Transpose=False):
        super(up, self).__init__()
    
        # would be a nice idea if the upsampling could be learned too,
        # but my machine do not have enough memory to handle all those weights    
        if Transpose:
            self.up = nn.ConvTranspose2d(in_ch, in_ch//2, 2, stride=2)   # 转置卷积,步长为2,kernel为2
        else:
            self.up = nn.Sequential(
                nn.Upsample(scale_factor=2, mode='bilinear',align_corners=True),  # 双线性插值1上采样,比例为2
                nn.Conv2d(in_ch, in_ch//2, kernel_size=1, padding=0),             # 1 * 1 卷积,减少通道数
                nn.ReLU(inplace=True)
                )
        self.conv = DoubleConv(in_ch, out_ch)
        self.up.apply(self.init_weights)   # 初始化权重
    
    def forward(self, x1, x2):
        x1 = self.up(x1)  # 上采样
        
        diffY = x2.size()[2] - x1.size()[2]  # 计算 x2 和 x1 的高度差
        diffX = x2.size()[3] - x1.size()[3]  # 计算 x2 和 x1 的宽度差
        
        # 对 x1 进行补零
        x1 = nn.functional.pad(x1, (diffX // 2, diffX - diffX//2,
                                    diffY // 2, diffY - diffY//2))
        x = torch.cat([x2, x1], dim=1)   # skip connection
        x = self.conv(x)
        return x
    
    @staticmethod
    def init_weights(m):
        if type(m) == nn.Conv2d:
            init.xavier_normal_(m.weight)  # Xavier初始化
            init.constant_(m.bias, 0)      # 常数初始化,设置为0
          

class OutConv(nn.Module):
    '''U-net最后的输出层'''
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)  # 定义一个1 * 1卷积层,将输入的特征图映射为输出的特征图
    
    def forward(self, x):
        return self.conv(x)

class InConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
    
    def forward(self, x):
        return self.conv(x)
  

class Unet(nn.Module):
    def __init__(self, in_ch, out_ch=1):
        super(Unet, self).__init__()
        self.loss_stack = 0
        self.matrix_iou_stack = 0
        self.stack_count = 0
        self.display_names = ['loss_stack', 'matrix_iou_stack']
        self.bce_loss = nn.BCELoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
        self.inc = InConv(in_ch, 64)  # 输入卷积层
        self.down1 = Down(64, 128)    # 下采样层 1
        self.down2 = Down(128, 256)   # 下采样层 2
        self.down3 = Down(256, 512)   
        self.drop3 = nn.Dropout2d(0.5)
        self.down4 = Down(512, 1024)
        self.drop4 = nn.Dropout2d(0.5)
        self.up1 = up(1024, 512, False)
        self.up2 = up(512, 256, False)
        self.up3 = up(256, 128, False)
        self.up4 = up(128, 64, False)
        self.outc = OutConv(64, out_ch)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)  # 定义优化器
        
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x4 = self.drop3(x4)
        x5 = self.down4(x4)
        x5 = self.drop4(x5)
        # 下采样结束,开始上采样
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        self.pred_y = torch.sigmoid(x)
        return self.pred_y

由于网上很多的U-net不能直接运行,于是自己经过比对多个版本之后,写出了一个可以直接运行的U-net,存这儿备用。

### U-Net概述 U-Net是一种基于卷积神经网络(CNN)的深度学习架构,最初由Olaf Ronneberger等人提出用于生物医学图像分割任务[^1]。该模型因其独特的结构设计而得名——形似字母“U”。它通过编码器和解码器两个主要部分实现了高分辨率特征图到低维表示再还原的过程。 #### 架构特点 U-Net的核心在于其对称式的下采样与上采样的结合方式。具体来说: - **编码路径**:这一阶段负责提取输入数据中的高层次语义信息。通常采用多个连续的卷积层以及最大池化操作来逐步缩小空间尺寸并增加通道数。 - **解码路径**:此过程旨在恢复细节信息并将先前学到的空间上下文重新映射回原始大小。每一步都涉及转置卷积或者插值放大技术,并伴随跳跃连接将来自相应层次上的浅层特征融合进来[^2]。 #### 实现要点 构建一个标准版本的U-Net可以遵循如下模式: ```python import tensorflow as tf from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Concatenate def unet_model(input_size=(572, 572, 1)): inputs = tf.keras.Input(shape=input_size) # Encoder Path conv1 = Conv2D(64, (3, 3), activation='relu', padding='valid')(inputs) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) ... # More layers following similar pattern up8 = UpSampling2D(size=(2, 2))(...) merge8 = Concatenate(axis=3)([up8, conv2]) out_layer = Conv2D(...)(...) model = tf.keras.Model(inputs=[inputs], outputs=out_layer) return model ``` 上述代码片段展示了如何定义基本组件并通过逐级堆叠形成完整的网络框架[^3]。 #### 应用场景 由于具备强大的像素级预测能力,U-net被广泛应用于医疗影像分析领域比如肿瘤检测、细胞计数等;同时也扩展到了卫星遥感图片处理、自动驾驶车辆环境感知等多个方向[^4]。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值