1. 从零实现
0 导入需要的包
#000 导包
import torch
print(torch.__version__)
import torchvision
print(torchvision.__version__)
import torchvision.transforms as transforms
import d2l.torch as d2l
import numpy as np
import os
print("当前工作目录:", os.getcwd())
import sys
import torchtext
#------------运行结果---------------
# 1.12.0
# 0.13.0
1.数据的下载及预处理
#1. 数据集的下载和预处理
# 1.1 数据集合的下载
mnist_train = torchvision.datasets.FashionMNIST(
root='/Users/w/PycharmProjects/DeepLearning_with_LiMu/datasets/FashionMnist', train=True, download=True,
transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(
root='/Users/w/PycharmProjects/DeepLearning_with_LiMu/datasets/FashionMnist', train=False, download=True,
transform=transforms.ToTensor())
# 1.2 数据集的预处理
batch_size = 256
if sys.platform.startswith('win'):
num_worker = 0
else:
num_worker = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_worker)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_worker)
#1.3 数据详情与分析
# <class 'torchvision.datasets.mnist.FashionMNIST'>
print(type(mnist_train))
# 60000 10000 训练集的数量6W条,测试集的数量1W条
print(len(mnist_train), len(mnist_test))
# 绘制图像直观展示,原始数据
from CommonFunctionModule import data_iter
from CommonFunctionModule import get_fashion_mnist_label
from CommonFunctionModule import show_fashion_mnist
X, y = [], []
for i in range(10):
X.append(mnist_train[i][0])
y.append(mnist_train[i][1])
show_fashion_mnist(X, get_fashion_mnist_label(y))
feature, label = mnist_train[0]
print('feature.size() = ' + str(feature.size()) +'\n')
feature_trans = feature.reshape((-1,28*28))
print('feature_trans.size()' +str(feature_trans.size()) +'\n' )
<class ‘torchvision.datasets.mnist.FashionMNIST’>
60000 10000
2.定义模型
# 2.定义模型
## 2.1 参数初始化
# 输入神经元的个数
num_input = 28 * 28
# 输出神经元的个数
num_output = 10
# 参数初始化, 样本矩阵: n*784 权重矩阵:784*10 X·w = n*10
w = torch.tensor(np.random.normal(loc=0, scale=0.01, size=(num_input, num_output)), dtype=torch.float32,requires_grad=True)
b = torch.zeros(num_output,dtype = torch.float32,requires_grad=True)
def net(X):
return softmax(torch.mm(X.view((-1, num_input)), w) + b)
def softmax(X):
X_exp = X.exp()
X_sum = X_exp.sum(dim=1, keepdim=True)
return X_exp / X_sum
3.定义损失函数
# 3.定义损失函数,使用交叉熵损失函数
def cross_entropy(y_hat,y):
return - torch.log( y_hat.gather(dim = 1, index= y.view(-1,1)))
4.定义模型评估函数
# 4.定义模型评估
# 4.1 定义已知结果下模型评估
def accuracy(y_hat,y):
return (y_hat.argmax(dim=1)==y).float().mean().item()
# t1 = torch.tensor([[1,2,3,4,5]])
# t2 = torch.tensor([1,2,3,4,6])
# ratio = accuracy(y_hat=t1,y = t2)
# print('测试精度函数:' + str(accuracy(t1,t2)) )
# 4.2 定义模型对,测试数据集的准确率
def evaluate_accuracy(data_iter,net):
acc_sum,n = 0.0,0
for X,y in data_iter:
acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
n+=y.shape[0]
return acc_sum/n
# 测试evaluate_accuracy函数
evaluate_accuracy(test_iter,net)
5.模型训练
# 5.模型训练
num_epochs,lr =5,0.1
def train_fashion_mnist(net , train_iter , test_iter , loss ,num_epochs ,batch_size,params =None,lr =None, optimizer= None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum ,n = 0.0,0.0,0
for X,y in train_iter:
# 正向传播
y_hat = net(X)
# 计算损失函数
l = loss(y_hat,y).sum()
# 梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
d2l.sgd(params,lr, batch_size)
else:
optimizer.step()
# 累加求本轮损失综合
train_l_sum += l.item()
# 累加求精度总和
train_acc_sum +=(y_hat.argmax(dim= 1) ==y).sum().item()
# 计数
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter,net)
print('epoch %d ,loss %.4f , train acc %.3f ,test acc %.3f' % (epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
train_fashion_mnist(net,train_iter,test_iter,cross_entropy,5,batch_size,[w,b],lr)
epoch 1 ,loss 0.7869 , train acc 0.748 ,test acc 0.794
epoch 2 ,loss 0.5697 , train acc 0.813 ,test acc 0.805
epoch 3 ,loss 0.5254 , train acc 0.825 ,test acc 0.818
epoch 4 ,loss 0.5018 , train acc 0.831 ,test acc 0.825
epoch 5 ,loss 0.4852 , train acc 0.838 ,test acc 0.827
6.模型的预测结果及可视化
X,y = iter(test_iter).next()
true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
import d2lzh_pytorch as d2l
titles = [true+'\n'+pred for true,pred in zip(true_labels,pred_labels)]
d2l.show_fashion_mnist(X[0:9],titles[0:9])