丢弃法
丢弃法本质是个正则化项,可用于解决过拟合问题
其中Xi撇是对每个输入Xi处理后得到的值,经过这样的处理后,Xi撇的期望与Xi相等
在神经网络中,dropout层的实现如下:
def dropout_layer(X, dropout):
assert 0 <= dropout <= 1
# 在本情况中,所有元素都被丢弃
if dropout == 1:
return torch.zeros_like(X)
# 在本情况中,所有元素都被保留
if dropout == 0:
return X
mask = (torch.rand(X.shape) > dropout).float()
return mask * X / (1.0 - dropout)
实验代码
import torch
from torch.nn.modules.flatten import Flatten
from torch.nn.modules.loss import CrossEntropyLoss
import torchvision
from torchvision import transforms
from torch.utils import data
from torch import nn
import plotly.graph_objects as go
import plotly.offline as of
# 导入数据
trains = transforms.ToTensor()# 创建一个对象,将图片转化为Tensor数据
train_data = torchvision.datasets.FashionMNIST(root='D:\\PZL\\vscode\\python', train=True, transform=trains, download=True)# 下载Fashion-MNIST数据集到内存中
test_data = torchvision.datasets.FashionMNIST(root='D:\\PZL\\vscode\\python', train=False, transform=trains, download=True)
#读数据
train_iter = data.DataLoader(train_data, batch_size=256, shuffle=True)
test_iter = data.DataLoader(test_data, batch_size=256, shuffle=True)
#定义神经网络模型
dropout1 = 0.2# 表示丢弃的概率p
dropout2 = 0.5
net = nn.Sequential(nn.Flatten(), nn.Linear(784,256), nn.ReLU(), nn.Dropout(dropout1), nn.Linear(256,256), nn.ReLU(), nn.Dropout(dropout2), nn.Linear(256,10))# 设置两个隐藏层,做两次dropout处理
#初始化神经网络的参数
def init_weights(m):# m为神经网络里的layer
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)# 权重初始化平均值为0,方差为0.01的正态分布,默认平均值为0
net.apply(init_weights)# 神经网络里的每一层都调用这个函数
#定义损失函数,选择交叉熵误差作为损失函数
loss = nn.CrossEntropyLoss()
#定义优化算法
trainer = torch.optim.SGD(net.parameters(), lr=0.1)
#训练
num_epochs = 10
e = [x for x in range(num_epochs)]
loss_test = []
for epoch in range(num_epochs):
for data, target in train_iter:# data是size(256*28*28), taget是size(256)对应每个图片的label,两个都是tensor数据类型
net.train()# 神经网络转化为训练模式,不忽略优化层
l = loss(torch.softmax(net(data),dim=1), target)# 对y hat进行softmax函数处理
trainer.zero_grad()
l.backward()
trainer.step()
test_input = torch.tensor([x[0].tolist() for x in test_data])
test_output = torch.tensor([x[1] for x in test_data])
net.eval()# 将神经网络转化为评估模式,忽略优化层,比如dropout层(切记eval后,再次训练重新调用train转回训练模式)
l = loss(torch.softmax(net(test_input),dim=1), test_output)
loss_test.append(l.item())
# 可视化每次迭代周期,测试集的损失值
line = go.Scatter(x=e, y=loss_test, name='test')
fig = go.Figure(line)
fig.update_layout(title='dropout1={}, dropout2={}'.format(dropout1, dropout2), xaxis_title='epoch', yaxis_title='loss')
of.plot(fig)
dropout参数是个超参数,代表丢弃神经元的概率!
当dropout=1时,全部神经元都被丢弃,即每个输入x均置为0
当dropout=0时,全部神经元都不会被丢弃,即该dropout层不起作用