1. LeNet 和 AlexNet 网络架构

2.相比于LeNet,AlexNet 改进
- 在全连接层后面增加了Dropout 用来防止模型过拟合
- 将激活函数由Sigmoid 改为 Relu,以降低梯度消失的概率
- 下采样层由AvgPooling改为MaxPooling
3. 模型层设计
net = nn.Sequential(
nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(6400, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 10))
整体代码
import torch
from torch import nn
from torchvision import datasets
from torch.utils import data
from torchvision import transforms
batch_size = 128
lr = 0.01
resize = 224
trans = [transforms.ToTensor()]
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
train_data = datasets.FashionMNIST(root='./train_Fashiondata',train=True,transform = trans,download=True)
test_data = datasets.FashionMNIST(root='./test_Fashiondata',train=False,transform = trans,download=True)
train_iter = data.DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_iter = data.DataLoader(test_data,batch_size=batch_size,shuffle=False)
net = nn.Sequential(
nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(6400, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 10))
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
def train(epoches):
running_loss = 0.0
for i,(X,y) in enumerate(train_iter):
X,y = X.to(device),y.to(device)
y_hat = net(X)
l = loss(y_hat,y)
optimizer.zero_grad()
l.backward()
optimizer.step()
running_loss += l.item()
if i % 100 == 99:
print('[%d,%5d loss: %.3f' % (epoches+1,i + 1,running_loss/100))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_iter:
images,labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_,predicted = torch.max(outputs.data,dim=1)
total += labels.size(0)
correct += (predicted==labels).sum().item()
print("正确率 %d %%"% (100* correct / total))
for k in range(20):
train(k)
test()