PyTorch实现VGG

该博客详细介绍了如何使用PyTorch实现VGG网络,重点在于VGG块的定义及其结构。通过堆叠3x3卷积核和2x2池化层,创建了一个具有8个卷积层和5个最大池化层的网络,最终缩小了输入尺寸并增加了通道数。
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable

定义VGG的block

VGG几乎全部使用3x3的卷积核,以及2x2的池化层,使用小的卷积核进行多层堆叠和一个大的卷积核感受野是相同的,同时VGG小的卷积核还能减少参数,具有更深的结构。

def vgg_block(num_convs, in_channels, out_channels):
    # 定义第一层,并转化为 List
    net = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),nn.ReLU(True)]
    
    # 通过循环定义其他层
    for i in range(num_convs - 1):
        # List每次只能添加一个元素
        # 输入和输出channel均为out_channels
        net.append(nn.Conv2d(out_channels, out_channels, kernel_size=3,padding=1))
        net.append(nn.ReLU(True))
        
    # 定义池化层    
    net.append(nn.MaxPool2d(2, 2))
    # List数据前面加‘*’表示将List拆分为独立的参数
    return nn.Sequential(*net)

打印出模型结构

# 3个卷积层,输入通道 64, 输出通道 128
block_demo = vgg_block(3, 64, 128)
print(block_demo)

输出:

Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): ReLU(inplace)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

输入尺寸验证,定义输入为(1, 64, 300, 300)

input_demo = Variable(torch.zeros(1,64, 300, 300))
output_demo = block_demo(input_demo)
print(output_demo.shape)

输出:

torch.Size([1, 128, 150, 150])

经过一个VGG模块,输入大小减半,通道数为128

定义循环函数,对VGG模块堆叠

def vgg_stack(num_convs, channels):
    net = []
    for n, c in zip(num_convs, channels):
        in_c = c[0]
        out_c = c[1]
        net.append(vgg_block(n, in_c, out_c))
    return nn.Sequential(*net)
        

利用循环函数,定义一个简单的VGG结构,其中有8个卷积层

vgg_net = vgg_stack((1, 1, 2, 2, 2),((3, 64),(64, 128),(128, 256),(256, 512),(512, 512)))
print(vgg_net)

输出:

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (2): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (3): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (4): Sequential(
    (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)

5个最大池化层,图片最后会缩减5倍(2^5)

添加全连接层

class vgg(nn.Module):
    def __init__(self):
        super(vgg, self).__init__()
        self.feature = vgg_net
        self.fc = nn.Sequential(
            nn.Linear(512, 100),
            nn.ReLU(True),
            nn.Linear(100, 10)
        )
    def forward(self, x):
        x = self.feature(x)
        x = self.view(x.shape[0], -1)
        x = self.fc(x)
        return x
VGG_Net = vgg()
print(VGG_Net)

输出:

vgg(
  (feature): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): Sequential(
      (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (4): Sequential(
      (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=100, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
### 使用 PyTorch 实现 VGG 模型 以下是基于 PyTorchVGG 模型实现示例代码,涵盖了数据加载、模型搭建以及训练过程。 #### 数据加载 使用 `torch.utils.data.DataLoader` 类来创建两个数据加载器(DataLoader),分别用于训练数据集 (`train_dl`) 和测试数据集 (`test_dl`): ```python import torch from torch.utils.data import DataLoader from torchvision import datasets, transforms # 定义数据预处理操作 transform = transforms.Compose([ transforms.Resize((224, 224)), # 调整图片尺寸为 (224, 224) transforms.ToTensor(), # 将图片转换为 Tensor 并归一化至 [0, 1] ]) # 加载 CIFAR-10 训练和测试数据集作为示例 train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) # 创建 DataLoaders batch_size = 32 train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dl = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) ``` #### 构建网络模型 利用 PyTorch 提供的预定义模型库 `torchvision.models.vgg16` 来快速构建 VGG-16 模型,并对其进行微调以适应特定任务需求: ```python import torch.nn as nn from torchvision.models import vgg16 device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using {device} device") model = vgg16(pretrained=True).to(device) # 加载预训练权重并移动到指定设备 # 冻结前面的卷积层参数 for param in model.features.parameters(): param.requires_grad = False # 修改最后一层全连接层以适配新的类别数 num_classes = 10 # 假设目标分类数量为 10 in_features = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(in_features, num_classes).to(device) ``` #### 设置损失函数与优化器 为了完成监督学习任务,需配置交叉熵损失函数和随机梯度下降优化算法: ```python criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) ``` #### 训练循环 编写简单的训练逻辑以便于更新模型参数: ```python def train_model(dataloader, model, criterion, optimizer, epochs=5): model.train() for epoch in range(epochs): running_loss = 0.0 correct_predictions = 0 total_samples = 0 for images, labels in dataloader: images, labels = images.to(device), labels.to(device) # 清零梯度缓存 optimizer.zero_grad() # 正向传播 outputs = model(images) loss = criterion(outputs, labels) # 反向传播与参数更新 loss.backward() optimizer.step() # 统计当前批次的结果 _, predicted = torch.max(outputs, dim=1) running_loss += loss.item() * images.size(0) correct_predictions += (predicted == labels).sum().item() total_samples += labels.size(0) avg_loss = running_loss / total_samples accuracy = correct_predictions / total_samples print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy*100:.2f}%') # 开始训练 train_model(train_dl, model, criterion, optimizer, epochs=5) ``` 以上即是一个完整的流程展示如何借助 PyTorch实现 VGG 模型及其应用实例[^1][^2][^3]. ---
评论 2
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值