一、背景
在实现复现一种基于UNet的模型时遇到需要前向传播需要输出两个参数,如下采样块(需要输出与输入同尺寸卷积输出c和池化后尺寸缩小一半的的输出p)以及需要输出两个参数,如上采样块(需要输入上一层模型的输出s以及之前下采样层的输出x),基于现有的nn模型中无法实现双输出和双输入,需要自定义forward函数,具体代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
# 定义UNet模型类
class UNetElephant(nn.Module):
def __init__(self, nb_filters_start=16, complexity=2):
super(UNetElephant, self).__init__()
# 下采样块(编码器)
self.down_block_x1 = self.down_block(nb_filters_start, complexity) # 第一个下采样块,64个过滤器
self.down_block_x2 = self.down_block(nb_filters_start * 2, complexity) # 第二个下采样块,128个过滤器
self.down_block_x3 = self.down_block(nb_filters_start * 4, complexity) # 第三个下采样块,256个过滤器
self.down_block_x4 = self.down_block(nb_filters_start * 8, complexity) # 第四个下采样块,512个过滤器
self.down_block_x5 = self.down_block(nb_filters_start * 16, complexity) # 第五个下采样块,1024个过滤器
# 融合块
self.bottom_transition_x = self.bottom_transition(nb_filters_start * 16, nb_filters_start * 32, complexity) # 融合块,
# 上采样块(解码器)
self.up_block_x1 = self.up_block(nb_filters_start * 32 + nb_filters_start * 16, nb_filters_start * 16, complexity) # 第一个上采样块,256个过滤器
self.up_block_x2 = self.up_block(nb_filters_start * 16 + nb_filters_start * 8, nb_filters_start * 8, complexity) # 第二个上采样块,128个过滤器
self.up_block_x3 = self.up_block(nb_filters_start * 8 + nb_filters_start * 4, nb_filters_start * 4, complexity) # 第三个上采样块,64个过滤器
self.up_block_x4 = self.up_block(nb_filters_start * 4 + nb_filters_start * 2, nb_filters_start * 2, complexity) # 第四个上采样块,3个过滤器
self.up_block_x5 = self.up_block(nb_filters_start * 2 + nb_filters_start, nb_filters_start, complexity) # 第五个上采样块,1个过滤器
# 输出层
self.final_conv = nn.Conv2d(nb_filters_start, 1, kernel_size=1) # 输出通道为1,假设是单通道输出
def down_block(self, out_channels, num_conv_layers):
"""
下采样块:包括多个卷积层、激活层、批量归一化、最大池化。
返回两个值:卷积后的输出 c 和池化后的输出 p
"""
layers = []
in_channels = 3 if not hasattr(self, 'down_block_x1') else out_channels // 2 # 假设输入是3通道
# 多个卷积层
for i in range(num_conv_layers):
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True)) # 使用ReLU激活函数
if (i+1) % 2 == 0:
layers.append(nn.BatchNorm2d(out_channels)) # 批量归一化
in_channels = out_channels # 更新输入通道数
# 组合卷积层
conv_block = nn.Sequential(*layers)
# 最大池化层和批量归一化
max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
batch_norm = nn.BatchNorm2d(out_channels)
# 返回卷积后的特征图和池化后的特征图
def forward(x):
c = conv_block(x)
p = max_pool(c)
p = batch_norm(p)
return c, p
return forward
def up_block(self, in_channels, out_channels, num_conv_layers):
"""
上采样块:包括卷积层、激活层、批量归一化、上采样。
"""
layers = []
for i in range(num_conv_layers):
if i == 0:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
else:
layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True))
if i > 2 and i % 2 == 0:
layers.append(nn.BatchNorm2d(in_channels))
upsample = nn.Upsample(scale_factor=2, align_corners=True, mode='bilinear')
conv_block = nn.Sequential(*layers)
def forward(x, s):
u = upsample(x)
cc = torch.cat([u, s], dim=1)
out = conv_block(cc)
return out
return forward
def bottom_transition(self, in_channels, out_channels, num_conv_layers):
"""
下采样块:包括多个卷积层、激活层、批量归一化、最大池化。
返回两个值:卷积后的输出 c 和池化后的输出 p
"""
layers = []
# 多个卷积层
for i in range(num_conv_layers):
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True)) # 使用ReLU激活函数
if (i+1) % 2 == 0:
layers.append(nn.BatchNorm2d(out_channels)) # 批量归一化
in_channels = out_channels # 更新输入通道数
# 组合卷积层
conv_block = nn.Sequential(*layers)
return conv_block
def forward(self, x):
# 下采样过程
c1, p1 = self.down_block_x1(x)
c2, p2 = self.down_block_x2(p1)
c3, p3 = self.down_block_x3(p2)
c4, p4 = self.down_block_x4(p3)
c5, p5 = self.down_block_x5(p4)
bt = self.bottom_transition_x(p5)
# 上采样过程
u1 = self.up_block_x1(bt, c5)
u2 = self.up_block_x2(u1, c4)
u3 = self.up_block_x3(u2, c3)
u4 = self.up_block_x4(u3, c2)
u5 = self.up_block_x5(u4, c1)
# 输出层
output = self.final_conv(u5)
# 输出层使用sigmoid函数激活
output = torch.sigmoid(output)
return output
if __name__ == '__main__':
# 检查CUDA是否可用,并设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# 实例化模型
model = UNetElephant()
# 将模型移动到指定设备
model.to(device)
print(f"Model is on device: {next(model.parameters()).device}")
# for name, param in model.named_parameters():
# print(f"{name}: {param.device}")
# 创建一个随机输入张量
inp = torch.randn(1, 3, 256, 256)
# 将输入张量移动到指定设备
inp = inp.to(device)
print(f"Input is on device: {inp.device}")
# 执行前向传播,并打印输出的形状
try:
output = model(inp)
print("Output shape:", output.shape)
except RuntimeError as e:
print("RuntimeError:", e)
基于此实现的模型在移至cuda上实现会报错:
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
二、解决方法
model.to('cuda')的方式只适用于nn.Module对象,故需要使用重载nn.Module的方法实现上下采样模块
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
class DownBlock(nn.Module):
def __init__(self, in_channels, out_channels, num_conv_layers):
super().__init__()
self.conv_block_x = self.conv_block(in_channels, out_channels, num_conv_layers)
self.down_sample_x = self.down_block(out_channels)
def forward(self, x):
c = self.conv_block_x(x)
p = self.down_sample_x(c)
return c, p
def conv_block(self, in_channels, out_channels, num_conv_layers):
"""
下采样块:包括多个卷积层、激活层、批量归一化、最大池化。
返回两个值:卷积后的输出 c 和池化后的输出 p
"""
layers = []
# 多个卷积层
for i in range(num_conv_layers):
if i == 0:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
else:
layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True)) # 使用ReLU激活函数
if (i + 1) % 2 == 0:
layers.append(nn.BatchNorm2d(out_channels)) # 批量归一化
in_channels = out_channels # 更新输入通道数
# 组合卷积层
conv_block = nn.Sequential(*layers)
return conv_block
def down_block(self, out_channels):
max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
batch_norm = nn.BatchNorm2d(out_channels)
return nn.Sequential(max_pool, batch_norm)
class UpBlock(nn.Module):
def __init__(self, in_channels, out_channels, num_conv_layers):
super().__init__()
self.up_sample_x = nn.Upsample(scale_factor=2, align_corners=True, mode='bilinear')
self.conv_block_x = self.conv_block(in_channels, out_channels, num_conv_layers)
def forward(self, x, s):
u = self.up_sample_x(x)
cc = torch.cat([u, s], dim=1)
out = self.conv_block_x(cc)
return out
def conv_block(self, in_channels, out_channels, num_conv_layers):
layers = []
for i in range(num_conv_layers):
if i == 0:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
else:
layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True))
if i > 2 and i % 2 == 0:
layers.append(nn.BatchNorm2d(in_channels))
conv_block = nn.Sequential(*layers)
return conv_block
class BottomTransition(nn.Module):
def __init__(self, in_channels, out_channels, num_conv_layers):
super().__init__()
self.conv_block_x = self.conv_block(in_channels, out_channels, num_conv_layers)
def forward(self, x):
return self.conv_block_x(x)
def conv_block(self, in_channels, out_channels, num_conv_layers):
layers = []
for i in range(num_conv_layers):
if i == 0:
layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1))
else:
layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(inplace=True))
if i > 2 and i % 2 == 0:
layers.append(nn.BatchNorm2d(in_channels))
conv_block = nn.Sequential(*layers)
return conv_block
# 定义UNet模型类
class UNetElephant(nn.Module):
def __init__(self, nb_filters_start=16, complexity=2):
super(UNetElephant, self).__init__()
# 下采样块(编码器)
self.down_block_x1 = DownBlock(3, nb_filters_start, complexity) # 第一个下采样块
self.down_block_x2 = DownBlock(nb_filters_start, nb_filters_start * 2, complexity) # 第二个下采样块
self.down_block_x3 = DownBlock(nb_filters_start * 2, nb_filters_start * 4, complexity) # 第三个下采样块
self.down_block_x4 = DownBlock(nb_filters_start * 4, nb_filters_start * 8, complexity) # 第四个下采样块
self.down_block_x5 = DownBlock(nb_filters_start * 8, nb_filters_start * 16, complexity) # 第五个下采样块
# 融合块
self.bottom_transition_x = BottomTransition(nb_filters_start * 16, nb_filters_start * 32, complexity)
# 上采样块(解码器)
self.up_block_x1 = UpBlock(nb_filters_start * 32 + nb_filters_start * 16, nb_filters_start * 16, complexity)
self.up_block_x2 = UpBlock(nb_filters_start * 16 + nb_filters_start * 8, nb_filters_start * 8, complexity) #
self.up_block_x3 = UpBlock(nb_filters_start * 8 + nb_filters_start * 4, nb_filters_start * 4, complexity) #
self.up_block_x4 = UpBlock(nb_filters_start * 4 + nb_filters_start * 2, nb_filters_start * 2, complexity) #
self.up_block_x5 = UpBlock(nb_filters_start * 2 + nb_filters_start, nb_filters_start, complexity) #
# 输出层
self.final_conv = nn.Sequential(nn.Conv2d(nb_filters_start, 1, kernel_size=1), nn.Sigmoid()) # 输出通道为1,假设是单通道输出
def forward(self, x):
# 下采样过程
c1, p1 = self.down_block_x1(x)
c2, p2 = self.down_block_x2(p1)
c3, p3 = self.down_block_x3(p2)
c4, p4 = self.down_block_x4(p3)
c5, p5 = self.down_block_x5(p4)
bt = self.bottom_transition_x(p5)
# 上采样过程
u1 = self.up_block_x1(bt, c5)
u2 = self.up_block_x2(u1, c4)
u3 = self.up_block_x3(u2, c3)
u4 = self.up_block_x4(u3, c2)
u5 = self.up_block_x5(u4, c1)
# 输出层
output = self.final_conv(u5)
return output
if __name__ == '__main__':
# 检查CUDA是否可用,并设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# 实例化模型
model = UNetElephant()
# 将模型移动到指定设备
model.to(device)
print(f"Model is on device: {next(model.parameters()).device}")
# for name, param in model.named_parameters():
# print(f"{name}: {param.device}")
# 创建一个随机输入张量
inp = torch.randn(1, 3, 256, 256)
# 将输入张量移动到指定设备
inp = inp.to(device)
print(f"Input is on device: {inp.device}")
# 执行前向传播,并打印输出的形状
try:
output = model(inp)
print("Output shape:", output.shape)
except RuntimeError as e:
print("RuntimeError:", e)

被折叠的 条评论
为什么被折叠?



