import torch
import torchvision # pytorch对于计算机视觉所实现的一些库
from torch.utils import data
from torchvision import transforms
from torch import nn
from IPython import display
from d2l import torch as d2l
def load_data_fashion_mnist(batch_size, resize=None):
""" 封装加载数据的函数 """
trans = [transforms.ToTensor()] # 将图片都转换成一个张量
if resize:
trans.insert(0, tranforms.Resize(resize))
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(
root="/Users/gwd777/Desktop/LMDeepLearning/09data", # 指定下载的路径
train=True, # 表示下载的是训练数据集
transform=trans, # 得到的数据是tensor
download=True) # 默认从网上下载
#print(type(mnist_train))
#print(mnist_train)
#print(mnist_train[0][0].shape)
mnist_test = torchvision.datasets.FashionMNIST(
root="/Users/gwd777/Desktop/LMDeepLearning/09data", # 指定下载的路径
train=False, # 表示下载的是测试数据集,不参与模型的训练,是为了验证模型的好坏
transform=trans, # 得到的数据是tensor
download=True) # 默认从网上下载
return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
data.DataLoader(mnist_test, batch_size, shuffle=True, num_workers=4))
# 如何定义softmax模型
def softmax(X):
X_exp = torch.exp(X) # 对每一个元素进行指数e计算;X=[256, 10] 256个样本算一批次,每个样本输入一个含有10个分类向量
partition = X_exp.sum(1, keepdim=True) # 按维度为一进行求和,相当于是每一行所有值进行求和;依然保证其实一个二维的矩阵
# print(X_exp.size(), partition.size()) # 广播机制 [256, 10] / [256, 1] = [256, 10] 输出一个10分类的概率向量
return X_exp / partition
def net(X):
# val = torch.matmul(X.reshape((-1, W.shape[0])), W) + b
k = W.shape[0] # 获取 W的长度,类型是:<class 'int'> k=784
shape = (-1, k)
X2 = X.reshape(shape) # X.reshape([-1, 77]) 将张量变为一维列向量
# 每个tensor乘以其权重 val = [256, 784]*[784, 10]=[256, 10] 即输出是256个样本的概率向量
# b在这里使用了广播机制, 即每行 row向量都会加上b
val = torch.matmul(X2, W) + b
# print(X2)
# print('rt=', val.size(), 'X2=', X2.size(), 'W=', W.size(), 'b=', b.size(), 'X=', X.size())
# rt= torch.Size([256, 10]) X2= torch.Size([256, 784]) W= torch.Size([784, 10]) b= torch.Size([10]) X= torch.Size([256, 1, 28, 28])
return softmax(val)
# 如何定义交叉熵损失函数;给的预测值y_hat和 其真是分类值的标号(0~9);
def cross_entropy(y_hat, y):
ry = range(len(y_hat))
t = y_hat[ry, y] # 向量ry记录行坐标,y记录的是列坐标(一共256行;编号0~255);取指定标号(一个标号代表一个类别)下的预测值; 即取出特定类别下的预测值
return -torch.log(t) # 根据交叉熵损失的定义; 对每个t取log()对数值; t:256个样本,每个样本所对应的正确分类的 模型预测概率值
# y_hat 是预测分类;y是实际分类
def accuracy(y_hat, y):
""" 计算预测正确的数量 """
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1) # y_hat10个类别的概率向量当中;每一行元素值当中最大的那个元素的下标存到y_hat当中(即认为当前样本最有可能所属类别),作为预测分类的类别
cmp = y_hat.type(y.dtype) == y # 由于y_hat的数据类型与y的数据类型有可能不一致,这里进行强制转换,在比较y与y_hat,搞错一个bool类型的tensor
t = cmp.type(y.dtype) # 根据[False, False, True, False, True, False] 转化成 [0, 0, 1, 0, 1, 0]
return float(t.sum()) # 再求和就是所有类别预测正确的数量
# 如何评估任意模型的net的准确率
def evaluate_accuracy(net, data_iter):
""" 计算在指定数据集上的模型精度; 当前这个数据迭代器在模型上的精确度 """
if isinstance(net, nn.Module):
net.val() # 将模型设置为评估模式
metric = Accumulator(2) # 自定义一个累加器
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel()) # accuracy(net(X), y)=所有预测对的样本数;y.numel()=样本总数;不断的加入累加器当中
return metric[0] / metric[1] # 最后拿到预测精度=预测正确的样本数/总样本数
class Accumulator:
""" 自定义累加器,在n个变量上进行累加 """
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# 开始训练,对数据全部过一遍训练
def train_epoch_ch3(net, train_iter, loss, updater):
if isinstance(net, nn.Module): # 如果是nn模式,就是告诉框架要训练梯度了
net.train()
metric = Accumulator(3) # 用一个长度为3的迭代器累加信息
for X, y in train_iter:
# print('获取X的维度: ', X.dim())
# print(X.size(), X[0].size(), X[0][0].size(), X[0][0][0].size()) # 每个批次128张图片,每张图片 28*28*1 个像素点
y_hat = net(X) # 首先计算y_hat, [256, 10] 输出一个10分类的概率向量
l = loss(y_hat, y) # 计算损失,用计算出来的 概率向量 y_hat 和 y 进行比较
# print(y_hat.size(), l.size())
if isinstance(updater, torch.optim.Optimizer): # 如果使用的框架的模型
updater.zero_grad() # 梯度设置成0
l.backward() # 开始求导
updater.step() # 最终的结果自更新
metric.add(float(l)*len(y), accuracy(y_hat, y), y.size().numel()) # 记录正确分类的个数
else: # 如果是自定义的模型
sum = l.sum() # 对所有的值进行求和
sum.backward() # 求和再算梯度
updater(X.shape[0]) # 根据批量大小再跟新一次
all_loss = float(l.sum())
acc = accuracy(y_hat, y)
nel = y.numel()
metric.add(all_loss, acc, nel)
return metric[0] / metric[2], metric[1] / metric[2] # 所有的损失/所有的样本数,所有分类正确的/样本总数
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型(定义见第3章)"""
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
class Animator:
"""在动画中绘制数据"""
def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
figsize=(3.5, 2.5)):
if legend is None:
legend = []
d2l.use_svg_display()
self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
self.config_axes = lambda: d2l.set_axes(
self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
if not hasattr(y, "__len__"):
y = [y]
n = len(y)
if not hasattr(x, "__len__"):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
display.display(self.fig)
display.clear_output(wait=True)
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
""" 训练模型(定义见第3章)"""
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
# 小批量随机梯度下降来优化模型的损失函数
lr = 0.1
def updater(batch_size):
return d2l.sgd([W, b], lr, batch_size)
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
# print(train_iter, test_iter)
num_inputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater) # 调用前面的函数

问题:为鸡毛 不是 T=Pi*logPi 来计算全部的损失呢?