李沐从0到1实现softmax分类器_softmax 0-1-优快云博客

本文链接：https://blog.youkuaiyun.com/gwd777/article/details/128991392
import torch
import torchvision  # pytorch对于计算机视觉所实现的一些库
from torch.utils import data
from torchvision import transforms
from torch import nn
from IPython import display
from d2l import torch as d2l

def load_data_fashion_mnist(batch_size, resize=None):
    """ 封装加载数据的函数 """
    trans = [transforms.ToTensor()]  # 将图片都转换成一个张量
    if resize:
        trans.insert(0, tranforms.Resize(resize))
    trans = transforms.Compose(trans)

    mnist_train = torchvision.datasets.FashionMNIST(
        root="/Users/gwd777/Desktop/LMDeepLearning/09data",   # 指定下载的路径
        train=True,       # 表示下载的是训练数据集
        transform=trans,  # 得到的数据是tensor
        download=True)    # 默认从网上下载

    #print(type(mnist_train))
    #print(mnist_train)
    #print(mnist_train[0][0].shape)

    mnist_test = torchvision.datasets.FashionMNIST(
        root="/Users/gwd777/Desktop/LMDeepLearning/09data",   # 指定下载的路径
        train=False,       # 表示下载的是测试数据集，不参与模型的训练，是为了验证模型的好坏
        transform=trans,   # 得到的数据是tensor
        download=True)     # 默认从网上下载

    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
            data.DataLoader(mnist_test, batch_size, shuffle=True, num_workers=4))

# 如何定义softmax模型
def softmax(X):
    X_exp = torch.exp(X)   # 对每一个元素进行指数e计算；X=[256, 10] 256个样本算一批次,每个样本输入一个含有10个分类向量
    partition = X_exp.sum(1, keepdim=True)  # 按维度为一进行求和,相当于是每一行所有值进行求和；依然保证其实一个二维的矩阵
    # print(X_exp.size(), partition.size())   # 广播机制 [256, 10] / [256, 1] = [256, 10] 输出一个10分类的概率向量
    return X_exp / partition

def net(X):
    # val = torch.matmul(X.reshape((-1, W.shape[0])), W) + b

    k = W.shape[0]          # 获取 W的长度,类型是:<class 'int'> k=784
    shape = (-1, k)
    X2 = X.reshape(shape)   # X.reshape([-1, 77])  将张量变为一维列向量
    # 每个tensor乘以其权重  val = [256, 784]*[784, 10]=[256, 10] 即输出是256个样本的概率向量
    # b在这里使用了广播机制, 即每行 row向量都会加上b
    val = torch.matmul(X2, W) + b

    # print(X2)
    # print('rt=', val.size(), 'X2=', X2.size(), 'W=', W.size(), 'b=', b.size(), 'X=', X.size())
    # rt= torch.Size([256, 10]) X2= torch.Size([256, 784]) W= torch.Size([784, 10]) b= torch.Size([10]) X= torch.Size([256, 1, 28, 28])

    return softmax(val)

# 如何定义交叉熵损失函数;给的预测值y_hat和 其真是分类值的标号(0~9);
def cross_entropy(y_hat, y):
    ry = range(len(y_hat))
    t = y_hat[ry, y]        # 向量ry记录行坐标,y记录的是列坐标(一共256行；编号0~255)；取指定标号(一个标号代表一个类别)下的预测值; 即取出特定类别下的预测值
    return -torch.log(t)    # 根据交叉熵损失的定义; 对每个t取log()对数值; t:256个样本，每个样本所对应的正确分类的 模型预测概率值

# y_hat 是预测分类；y是实际分类
def accuracy(y_hat, y):
    """ 计算预测正确的数量 """
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)   # y_hat10个类别的概率向量当中；每一行元素值当中最大的那个元素的下标存到y_hat当中(即认为当前样本最有可能所属类别)，作为预测分类的类别
    cmp = y_hat.type(y.dtype) == y     # 由于y_hat的数据类型与y的数据类型有可能不一致,这里进行强制转换,在比较y与y_hat，搞错一个bool类型的tensor
    t = cmp.type(y.dtype)              # 根据[False, False, True, False, True, False] 转化成 [0, 0, 1, 0, 1, 0]
    return float(t.sum())              # 再求和就是所有类别预测正确的数量

# 如何评估任意模型的net的准确率
def evaluate_accuracy(net, data_iter):
    """ 计算在指定数据集上的模型精度; 当前这个数据迭代器在模型上的精确度 """
    if isinstance(net, nn.Module):
        net.val()              # 将模型设置为评估模式
    metric = Accumulator(2)    # 自定义一个累加器
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), y.numel()) # accuracy(net(X), y)=所有预测对的样本数；y.numel()=样本总数；不断的加入累加器当中
    return metric[0] / metric[1]     # 最后拿到预测精度=预测正确的样本数/总样本数

class Accumulator:
    """ 自定义累加器，在n个变量上进行累加 """
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# 开始训练,对数据全部过一遍训练
def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, nn.Module):  # 如果是nn模式，就是告诉框架要训练梯度了
        net.train()
    metric = Accumulator(3)         # 用一个长度为3的迭代器累加信息
    for X, y in train_iter:
        # print('获取X的维度: ', X.dim())
        # print(X.size(), X[0].size(), X[0][0].size(), X[0][0][0].size()) # 每个批次128张图片,每张图片 28*28*1 个像素点
        y_hat = net(X)      # 首先计算y_hat, [256, 10] 输出一个10分类的概率向量
        l = loss(y_hat, y)  # 计算损失,用计算出来的 概率向量 y_hat 和 y 进行比较
        # print(y_hat.size(), l.size())
        if isinstance(updater, torch.optim.Optimizer):  # 如果使用的框架的模型
            updater.zero_grad()  # 梯度设置成0
            l.backward()         # 开始求导
            updater.step()       # 最终的结果自更新
            metric.add(float(l)*len(y), accuracy(y_hat, y), y.size().numel())  # 记录正确分类的个数
        else:    # 如果是自定义的模型
            sum = l.sum()        # 对所有的值进行求和
            sum.backward()       # 求和再算梯度
            updater(X.shape[0])  # 根据批量大小再跟新一次

            all_loss = float(l.sum())
            acc = accuracy(y_hat, y)
            nel = y.numel()

            metric.add(all_loss, acc, nel)
    return metric[0] / metric[2], metric[1] / metric[2]  # 所有的损失/所有的样本数，所有分类正确的/样本总数


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    """训练模型（定义见第3章）"""
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

class Animator:
    """在动画中绘制数据"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5)):
        if legend is None:
            legend = []
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)

def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    """ 训练模型（定义见第3章）"""
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))

    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

# 小批量随机梯度下降来优化模型的损失函数
lr = 0.1
def updater(batch_size):
    return d2l.sgd([W, b], lr, batch_size)

if __name__ == '__main__':
    batch_size = 256
    train_iter, test_iter = load_data_fashion_mnist(batch_size)

    # print(train_iter, test_iter)

    num_inputs = 784
    num_outputs = 10

    W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)

    b = torch.zeros(num_outputs, requires_grad=True)

    num_epochs = 10
    train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater) # 调用前面的函数