MLP多层感知机+动手学深度学习+（构建RELU+从零构建MLP结构+torch包构建MLP）

本文链接：https://blog.youkuaiyun.com/qq_47698599/article/details/126373976

多层感知机MLP

MLP就是全连接层构成的神经网路，其每层的公式是X@W+b，再使用激活函数输出，作为下一层的输入使用。

RELU的构建

def relu_(x):
    a=torch.zeros_like(x)
    return torch.max(a,x)

MLP的从零构建

inputSize=28*28
hideSize=256
outputSize=10
W1=torch.nn.Parameter(torch.normal(mean=0,std=0.01,size=[inputSize,hideSize],requires_grad=True))
B1=torch.nn.Parameter(torch.zeros(size=[hideSize],requires_grad=True))
W2=torch.nn.Parameter(torch.normal(mean=0,size=[hideSize,outputSize],std=0.01,requires_grad=True))
B2=torch.nn.Parameter(torch.zeros(size=[outputSize],requires_grad=True))
def mlpmodel(x):
    y=torch.reshape(x,[-1,inputSize])
    w = [W1, W2]
    b = [B1, B2]
    for i in range(len(w)-1):
        y=relu_(y@w[i]+b[i])
    return y@w[-1]+b[-1]#输出层不用激活函数

MLP从零构建的训练

train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=1000)#mnist数据读取
d2l.train_ch3(mlpmodel,train_iter,test_iter,
              torch.nn.CrossEntropyLoss(reduction='mean'),
              num_epochs=10,
              updater=torch.optim.SGD(params=[W1,B1,W2,B2],lr=0.01))

MLP的模块训练

def initialze_linear(layer):
    if type(layer)==torch.nn.Linear:
        torch.nn.init.normal_(layer.weight,std=0.01)#线性层的初始化

model=torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(inputSize,hideSize),
    torch.nn.ReLU(),
    torch.nn.Linear(hideSize,outputSize)
)#构建模型
model.apply(initialze_linear)
d2l.train_ch3(model,train_iter,test_iter,
              torch.nn.CrossEntropyLoss(reduction='mean'),
              num_epochs=10,
              updater=torch.optim.SGD(params=model.parameters(),lr=0.01))

MLP整体代码

import torch
from d2l import torch as d2l

def relu_(x):
    a=torch.zeros_like(x)
    return torch.max(a,x)

def mlpmodel(x):
    y=torch.reshape(x,[-1,inputSize])
    w = [W1, W2]
    b = [B1, B2]
    for i in range(len(w)-1):
        y=relu_(y@w[i]+b[i])
    return y@w[-1]+b[-1]
    
def initialze_linear(layer):
    if type(layer)==torch.nn.Linear:
        torch.nn.init.normal_(layer.weight,std=0.01)
      
inputSize=28*28
hideSize=256
outputSize=10
W1=torch.nn.Parameter(torch.normal(mean=0,std=0.01,size=[inputSize,hideSize],requires_grad=True))
B1=torch.nn.Parameter(torch.zeros(size=[hideSize],requires_grad=True))
W2=torch.nn.Parameter(torch.normal(mean=0,size=[hideSize,outputSize],std=0.01,requires_grad=True))
B2=torch.nn.Parameter(torch.zeros(size=[outputSize],requires_grad=True))
if __name__=='__main__':
    train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size=1000)
    #自定义MLP模型
    d2l.train_ch3(mlpmodel,train_iter,test_iter,
                  torch.nn.CrossEntropyLoss(reduction='mean'),
                  num_epochs=10,
                  updater=torch.optim.SGD(params=[W1,B1,W2,B2],lr=0.01))
    #torch包的调用
    model=torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(inputSize,hideSize),
        torch.nn.ReLU(),
        torch.nn.Linear(hideSize,outputSize)
    )
    model.apply(initialze_linear)
    d2l.train_ch3(model,train_iter,test_iter,
                  torch.nn.CrossEntropyLoss(reduction='mean'),
                  num_epochs=10,
                  updater=torch.optim.SGD(params=model.parameters(),lr=0.01))