import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
定义VGG的block
VGG几乎全部使用3x3的卷积核,以及2x2的池化层,使用小的卷积核进行多层堆叠和一个大的卷积核感受野是相同的,同时VGG小的卷积核还能减少参数,具有更深的结构。
def vgg_block(num_convs, in_channels, out_channels):
# 定义第一层,并转化为 List
net = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),nn.ReLU(True)]
# 通过循环定义其他层
for i in range(num_convs - 1):
# List每次只能添加一个元素
# 输入和输出channel均为out_channels
net.append(nn.Conv2d(out_channels, out_channels, kernel_size=3,padding=1))
net.append(nn.ReLU(True))
# 定义池化层
net.append(nn.MaxPool2d(2, 2))
# List数据前面加‘*’表示将List拆分为独立的参数
return nn.Sequential(*net)
打印出模型结构
# 3个卷积层,输入通道 64, 输出通道 128
block_demo = vgg_block(3, 64, 128)
print(block_demo)
输出:
Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): ReLU(inplace)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
输入尺寸验证,定义输入为(1, 64, 300, 300)
input_demo = Variable(torch.zeros(1,64, 300, 300))
output_demo = block_demo(input_demo)
print(output_demo.shape)
输出:
torch.Size([1, 128, 150, 150])
经过一个VGG模块,输入大小减半,通道数为128
定义循环函数,对VGG模块堆叠
def vgg_stack(num_convs, channels):
net = []
for n, c in zip(num_convs, channels):
in_c = c[0]
out_c = c[1]
net.append(vgg_block(n, in_c, out_c))
return nn.Sequential(*net)
利用循环函数,定义一个简单的VGG结构,其中有8个卷积层
vgg_net = vgg_stack((1, 1, 2, 2, 2),((3, 64),(64, 128),(128, 256),(256, 512),(512, 512)))
print(vgg_net)
输出:
Sequential(
(0): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(1): Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(2): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(3): Sequential(
(0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(4): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
5个最大池化层,图片最后会缩减5倍(2^5)
添加全连接层
class vgg(nn.Module):
def __init__(self):
super(vgg, self).__init__()
self.feature = vgg_net
self.fc = nn.Sequential(
nn.Linear(512, 100),
nn.ReLU(True),
nn.Linear(100, 10)
)
def forward(self, x):
x = self.feature(x)
x = self.view(x.shape[0], -1)
x = self.fc(x)
return x
VGG_Net = vgg()
print(VGG_Net)
输出:
vgg(
(feature): Sequential(
(0): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(1): Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(2): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(3): Sequential(
(0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(4): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
(fc): Sequential(
(0): Linear(in_features=512, out_features=100, bias=True)
(1): ReLU(inplace)
(2): Linear(in_features=100, out_features=10, bias=True)
)
)
该博客详细介绍了如何使用PyTorch实现VGG网络,重点在于VGG块的定义及其结构。通过堆叠3x3卷积核和2x2池化层,创建了一个具有8个卷积层和5个最大池化层的网络,最终缩小了输入尺寸并增加了通道数。
4864

被折叠的 条评论
为什么被折叠?



