Pytorch—VGG网络和GoogleNet网络
在之前的文章中,我们实现了LeNet网络结构和AlexNet结构,其中ALexNet对于LeNet做出了一些修改。在下面的文章中,我们来介绍一下其他两种卷积网络的思路和实现。
1 VGG网络
1.1 结构回顾
实际上,VGG网络也是多个卷积层的堆叠的过程。其中VGG累积过程为:连续使用个数相同的,padding=1,Kernel-size=33的卷积之后,在堆叠一个stride=2,Kernel-size=22的Max-pooling层。,通过卷积层之后,输入输出的结果的size不变,通过池化层之后,输出结果的size为输入的一半。
对于给定的感受域,采用多个小的size的卷积核要优于使用大的卷积核。原因在于通过多个小的卷积核,相当于增加了卷积层的个数,进一步增加了整个网络的深度。这样保证了网络能够学习到更加复杂的模式,并且降低了参数的数量。在VGG中使用了3个33的卷积核来代替77的卷积核,使用了2个33的卷积核来替代55的卷积核,这样保证了VGG网络在具有相同的感知域的条件下,增加网络的深度,来达到学习复杂模式的作用。
1.2 代码实现
#encoding=utf-8
import time
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 构造一个将句子展开的层
class FlattenLayer(nn.Module):
def __init__(self):
super(FlattenLayer, self).__init__()
def forward(self, x):
return x.view(x.shape[0], -1)
# 构造一个VGG网络的块
def vgg_block(num_convs,in_channels,out_channels):
vgg_blocks = []
for i in range(num_convs):
if i == 0:
vgg_blocks.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
else:
vgg_blocks.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
vgg_blocks.append(nn.ReLU())
vgg_blocks.append(nn.MaxPool2d(kernel_size=2,stride=2))
# * 表示对List进行解码
return nn.Sequential(*vgg_blocks)
# 下面构造完整的VGG网络
# 整个VGG由5个块组成
# 第1,2个块 是单个的VGG,输入—输出通道(1,64) (64,128)
# 第3,4,5块是 两个VGG构成,输入—输出通道 (128 256) (256 512) (512 512)
conv_arch = ((1,1,64),(1,64,128),(2,128,256),(2,256,512),(2,512,512))
input_features = 512 * 7 * 7
hidden_features = 4096
def vgg(conv_arch,input_features,hidden_features=4096):
net = nn.Sequential()
for i,(num_convs,in_channels,out_channels) in enumerate(conv_arch):
net.add_module('vgg_block_'+str(i),vgg_block(num_convs,in_channels,out_channels))
net.add_module('fc',nn.Sequential(FlattenLayer(),
nn.Linear(input_features,hidden_features),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(hidden_features,hidden_features),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(hidden_features,10)))
return net
#定义数据加载
def load_data(batch_size,resize=None,root='~/Datasets/FashionMNIST'):
trans = []
if resize:
trans.append(torchvision.transforms.Resize(size=resize))
trans.append(torchvision.transforms.ToTensor())
transform = torchvision.transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True,
transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True,
transform=transform)
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)
return train_iter,test_iter
#定义准确率估计
def evaluate_accuracy(data_iter,net,device=None):
if device is None and isinstance(net,torch.nn.Module):
device = list(net.parameters())[0].device
acc_sum , n = 0.0 ,0
with torch.no_grad():
for X,y in data_iter:
if isinstance(net,torch.nn.Module):
net.eval()
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train()
else:
if ('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
# 定义训练函数
def train(net,train_iter,test_iter,batch_size,optimizer,device, num_epochs):
net = net.to(device)
print("training on ",device)
loss = torch.nn.CrossEntropyLoss()
for epoch in range(num_epochs):
train_loss_sum,train_acc_sum,n,batch_count,start = 0.0,0.0,0,0,time.time()
for X,y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
loss_value = loss(y_hat,y)
optimizer.zero_grad()
loss_value.backward()
optimizer.step()
train_loss_sum += loss_value
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter,net)
print("epoch %d,loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec"
%(epoch + 1, loss_value / batch_count, train_acc_sum / n, test_acc, time.time() - start))
#实例化
net = vgg(conv_arch,input_features,hidden_features)
#获取数据集
batch_size = 1
train_iter,test_iter = load_data(batch_size,resize=224)
# 定义超参数
lr = 0.001
num_epoches = 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
if __name__ == '__main__':
train(net, train_iter, test_iter, batch_size, optimizer, device, num_epoches)
2 参考
- 动手学习深度学习—pytorch版