多层感知机MLP
MLP就是全连接层构成的神经网路,其每层的公式是X@W+b,再使用激活函数输出,作为下一层的输入使用。
RELU的构建
def relu_(x):
a=torch.zeros_like(x)
return torch.max(a,x)
MLP的从零构建
inputSize=28*28
hideSize=256
outputSize=10
W1=torch.nn.Parameter(torch.normal(mean=0,std=0.01,size=[inputSize,hideSize],requires_grad=True))
B1=torch.nn.Parameter(torch.zeros(size=[hideSize],requires_grad=True))
W2=torch.nn.Parameter(torch.normal(mean=0,size=[hideSize,outputSize],std=0.01,requires_grad=True))
B2=torch.nn.Parameter(torch.zeros(size=[outputSize],requires_grad=True))
def mlpmodel(x):
y=torch.reshape(x,[-1,inputSize])
w = [W1, W2]
b = [B1, B2]
for i in range(len(w)-1):
y=relu_(y@w[i]+b[i])
return y@w[-1]+b[-1]#输出层不用激活函数
MLP从零构建的训练
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=1000)#mnist数据读取
d2l.train_ch3(mlpmodel,train_iter,test_iter,
torch.nn.CrossEntropyLoss(reduction='mean'),
num_epochs=10,
updater=torch.optim.SGD(params=[W1,B1,W2,B2],lr=0.01))
MLP的模块训练
def initialze_linear(layer):
if type(layer)==torch.nn.Linear:
torch.nn.init.normal_(layer.weight,std=0.01)#线性层的初始化
model=torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(inputSize,hideSize),
torch.nn.ReLU(),
torch.nn.Linear(hideSize,outputSize)
)#构建模型
model.apply(initialze_linear)
d2l.train_ch3(model,train_iter,test_iter,
torch.nn.CrossEntropyLoss(reduction='mean'),
num_epochs=10,
updater=torch.optim.SGD(params=model.parameters(),lr=0.01))
MLP整体代码
import torch
from d2l import torch as d2l
def relu_(x):
a=torch.zeros_like(x)
return torch.max(a,x)
def mlpmodel(x):
y=torch.reshape(x,[-1,inputSize])
w = [W1, W2]
b = [B1, B2]
for i in range(len(w)-1):
y=relu_(y@w[i]+b[i])
return y@w[-1]+b[-1]
def initialze_linear(layer):
if type(layer)==torch.nn.Linear:
torch.nn.init.normal_(layer.weight,std=0.01)
inputSize=28*28
hideSize=256
outputSize=10
W1=torch.nn.Parameter(torch.normal(mean=0,std=0.01,size=[inputSize,hideSize],requires_grad=True))
B1=torch.nn.Parameter(torch.zeros(size=[hideSize],requires_grad=True))
W2=torch.nn.Parameter(torch.normal(mean=0,size=[hideSize,outputSize],std=0.01,requires_grad=True))
B2=torch.nn.Parameter(torch.zeros(size=[outputSize],requires_grad=True))
if __name__=='__main__':
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=1000)
#自定义MLP模型
d2l.train_ch3(mlpmodel,train_iter,test_iter,
torch.nn.CrossEntropyLoss(reduction='mean'),
num_epochs=10,
updater=torch.optim.SGD(params=[W1,B1,W2,B2],lr=0.01))
#torch包的调用
model=torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(inputSize,hideSize),
torch.nn.ReLU(),
torch.nn.Linear(hideSize,outputSize)
)
model.apply(initialze_linear)
d2l.train_ch3(model,train_iter,test_iter,
torch.nn.CrossEntropyLoss(reduction='mean'),
num_epochs=10,
updater=torch.optim.SGD(params=model.parameters(),lr=0.01))