### 使用UNet模型进行图像分割实验
#### 准备工作
为了成功使用UNet模型进行图像分割,准备阶段至关重要。这包括获取合适的数据集并对其进行预处理。对于语义分割任务而言,存在多种可用的数据集[^1]。
#### 数据集划分与加载
数据应当被合理地划分为训练集、验证集和测试集,以评估模型性能的有效性和泛化能力。一种常见的做法是以7:1:2的比例来分配这些子集[^3]。通过PyTorch框架中的`Dataset`类可以方便地创建自定义数据集,并利用`DataLoader`来进行批量读取操作[^2]。
```python
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import os
class CustomImageSegmentationDataset(Dataset):
def __init__(self, image_dir, mask_dir, transform=None):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.images = os.listdir(image_dir)
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path = os.path.join(self.image_dir, self.images[idx])
mask_path = os.path.join(self.mask_dir, self.images[idx].replace(".jpg", ".png"))
image = Image.open(img_path).convert('RGB')
mask = Image.open(mask_path).convert('L')
if self.transform is not None:
image = self.transform(image)
mask = self.transform(mask)
return image, mask
transformations = transforms.Compose([
transforms.Resize((512, 512)),
transforms.ToTensor(),
])
train_dataset = CustomImageSegmentationDataset(
'path/to/train/images',
'path/to/train/masks',
transformations,
)
val_dataset = CustomImageSegmentationDataset(
'path/to/val/images',
'path/to/val/masks',
transformations,
)
test_dataset = CustomImageSegmentationDataset(
'path/to/test/images',
'path/to/test/masks',
transformations,
)
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
```
#### 构建UNet网络结构
接下来就是设计具体的神经网络架构,在这里选择了经典的UNet作为基础模型。该模型由编码器路径(下采样过程)、解码器路径(上采样恢复空间分辨率的过程),以及跳跃连接组成,使得低层特征能够传递给高层用于更精确的位置预测。
```python
import torch.nn as nn
import torch
def double_conv(in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3),
nn.ReLU(inplace=True)
)
class UNet(nn.Module):
def __init__(self, n_class):
super().__init__()
self.dconv_down1 = double_conv(3, 64)
self.dconv_down2 = double_conv(64, 128)
self.dconv_down3 = double_conv(128, 256)
self.dconv_down4 = double_conv(256, 512)
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.dconv_up3 = double_conv(256 + 512, 256)
self.dconv_up2 = double_conv(128 + 256, 128)
self.dconv_up1 = double_conv(128 + 64, 64)
self.conv_last = nn.Conv2d(64,n_class, kernel_size=1)
def forward(self, x):
conv1 = self.dconv_down1(x)
x = self.maxpool(conv1)
conv2 = self.dconv_down2(x)
x = self.maxpool(conv2)
conv3 = self.dconv_down3(x)
x = self.maxpool(conv3)
x = self.dconv_down4(x)
x = self.upsample(x)
x = torch.cat([x, conv3], dim=1)
x = self.dconv_up3(x)
x = self.upsample(x)
x = torch.cat([x, conv2], dim=1)
x = self.dconv_up2(x)
x = self.upsample(x)
x = torch.cat([x, conv1], dim=1)
x = self.dconv_up1(x)
out = self.conv_last(x)
return out
```
#### 训练流程设置
完成上述准备工作之后,则需配置好损失函数、优化算法以及其他辅助指标计算方法等要素,从而启动正式的训练循环。
```python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet(n_class=1).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(num_epochs): # num_epochs should be defined earlier.
model.train()
running_loss = 0.0
for i, (images, masks) in enumerate(train_loader):
images = images.to(device)
masks = masks.unsqueeze(dim=1).float().to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, masks)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % log_interval == log_interval - 1: # Print every `log_interval` mini-batches
print(f'Epoch [{epoch+1}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/log_interval:.4f}')
running_loss = 0.0
with torch.no_grad():
model.eval()
val_running_loss = 0.0
for j, (val_images, val_masks) in enumerate(val_loader):
val_images = val_images.to(device)
val_masks = val_masks.unsqueeze(dim=1).float().to(device)
val_outputs = model(val_images)
val_loss = criterion(val_outputs, val_masks)
val_running_loss += val_loss.item()
avg_val_loss = val_running_loss / len(val_loader)
print(f'\nValidation Epoch [{epoch+1}], Average Validation Loss: {avg_val_loss:.4f}\n')
```
#### 测试与评价
当训练完成后,还需要基于独立于训练样本之外的新颖实例——即测试集中所含有的图片及其对应的真值掩膜图,进一步检验已学得参数下的表现情况。通常会采用Dice系数或者IoU交并比这样的度量标准来量化二元分类效果的好坏程度。
```python
dice_score = []
with torch.no_grad():
model.eval()
for test_images, test_masks in test_loader:
test_images = test_images.to(device)
test_masks = test_masks.unsqueeze(dim=1).float().to(device)
pred_masks = model(test_images)
sigmoid_pred_masks = torch.sigmoid(pred_masks)
binary_pred_masks = (sigmoid_pred_masks > 0.5).float()
intersection = (binary_pred_masks * test_masks).sum()
union = binary_pred_masks.sum() + test_masks.sum()
dice = (2.*intersection)/(union + 1e-8)
dice_score.append(dice.cpu())
print(f'Test Dice Score: {torch.tensor(dice_score).mean():.4f}')
```