3.6. softmax回归的从零开始实现(动手学深度学习)
代码学习笔记
import torch
from IPython import display
from d2l import torch as d2l
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
3.6.1 初始化模型参数
num_inputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
3.6.2 定义softmax操作
X=torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
#分别以维度0和1求和,6=【1,2,3】相加 15=【4,5,6】自己相加
X.sum(0,keepdim=True),X.sum(1,keepdim=True)
def softmax(X):
X_exp = torch.exp(X)
partition = X_exp.sum(1,keepdim=True)
#广播机制,每一项除以总和,求概率,每行概率和为1
return X_exp / partition
X = torch.normal(0,1,(2,5))
X_prob = softmax(X)
X_prob,X_prob.sum(1)
3.6.3 定义模型
def net(X):
return softmax(torch.matmul(X.reshape((-1,W.shape[0])),W)+b)
3.6.4 定义损失函数
y = torch.tensor([0,2])
y_hat = torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
#[0,1]对应y[0]->0.y[1]->2;
#从中选取y_hat[0,0]->0.1,y_hat[1,2]->0.5
y_hat[[0,1],y]
def cross_entropy(y_hat,y):
#y_hat=torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])得出len(y_hat)=2
#-torch.log(y_hat[0][0])、-torch.log(y_hat[1][2])
return -torch.log(y_hat[range(len(y_hat