import torch
from torch import nn
from d2l import torch as d2l
net = nn.Sequential(,
nn.Flatten(),
nn.Linear(784,256),
nn.ReLU(),
nn.Linear(256,10)
) # 用nn.Sequential是容器类,用于将多个层按顺序叠加
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight,std=0.01) # m.weight意味着 nn.Linear这个层存在weight这个属性
net.apply(init_weights)
batch_size,lr,num_epochs = 256,0.1,10
loss = nn.CrossEntropyLoss(reduction='none') # reduction这个参数的意思损失值是否要聚合,none表示关注每一批次的损失值
trainer = torch.optim.SGD(net.parameters(),lr=lr)# net.parameters() 表示优化的是每一层的w和b
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,trainer)