PyTorch基础教程：自动求导与线性回归-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_44920757/article/details/116139475

本文介绍了PyTorch的基本操作，包括创建张量、自动求导机制、线性回归的实现以及Tensor的常见形式。详细讨论了如何使用`autograd`进行反向传播，以及线性回归模型的训练和测试。此外，还提到了Tensor的多种表示形式，如Scalar、Vector和Matrix，并简要介绍了PyTorch Hub模块，用于调用预训练的神经网络模型。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一. p4 pytorch基本操作

x = torch.empty(5, 3);

5行3列的一个张量（矩阵），特需格式是tensor

import torch
x = torch.empty(5, 3)
x
输出：
tensor([[-4.7960e+35,  3.0794e-41,  0.0000e+00],
        [ 0.0000e+00, -4.8290e+35,  3.0794e-41],
        [-4.8290e+35,  3.0794e-41, -4.8290e+35],
        [ 3.0794e-41,  0.0000e+00,  0.0000e+00],
        [-2.7661e+13,  4.5565e-41, -2.6833e+13]])

x = torch.rand(5, 3)

5行3列的随机数

#随机值，tensors是他的一个特需格式，所有计算都是在tensor的基础上运算
x = torch.rand(5, 3)
x
输出：
tensor([[0.5898, 0.6413, 0.3710],
        [0.8552, 0.1343, 0.6362],
        [0.0649, 0.6644, 0.0517],
        [0.4938, 0.4127, 0.6358],
        [0.1633, 0.9749, 0.7178]])

x = torch.zeros(5, 3, dtype=torch.long)

5行3列的零矩阵

x = torch.zeros(5, 3, dtype=torch.long)
x
输出：
tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

x = torch.tensor([5.5, 3])
写入一个矩阵（传入数据）
x = torch.randn_like(x, dtype=torch.float)
构建的大小一样的矩阵

x = torch.tensor([5.5, 3])
x
输出：
tensor([5.5000, 3.0000])
# 构建的矩阵大小一样
x= x.new_ones(5, 3, dtype=torch.double)

x = torch.randn_like(x, dtype=torch.float)
x
输出：
tensor([[ 0.5455,  0.3819, -0.4844],
        [-2.3630,  1.4743,  0.2780],
        [ 0.5868,  0.3403, -0.2346],
        [ 1.5565,  1.4095, -0.7017],
        [ 0.2720,  1.6439, -1.3772]])

x.size()
展示矩阵，相当于np.shape()

x.size()
输出：
torch.Size([5, 3])

torch.add(x, y) 或x + y
#加法一样

y = torch.rand(5, 3)
x + y
输出：
tensor([[ 0.8101,  0.9515, -0.2045],
        [-1.3861,  1.8700,  0.4592],
        [ 1.0789,  0.3978,  0.6649],
        [ 1.6045,  2.2121, -0.3190],
        [ 0.7040,  1.8255, -1.2072]])

torch.add(x, y)#加法一样
输出：
tensor([[ 0.8101,  0.9515, -0.2045],
        [-1.3861,  1.8700,  0.4592],
        [ 1.0789,  0.3978,  0.6649],
        [ 1.6045,  2.2121, -0.3190],
        [ 0.7040,  1.8255, -1.2072]])

x[:, 1] 索引，跟python差不多

x[:, 1]
tensor([0.3819, 1.4743, 0.3403, 1.4095, 1.6439])

y = x.view(a, b)
可以改变维度

x = torch.randn(4, 4)
#【16】表示（1，16）
y = x.view(16)
#-1代表自动计算，
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())
输出：
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])

a = torch.ones(5)
b = a.numpy() torch转np
a = np.ones(5)
b = torch.from_numpy(a) np转torch

#a是torch格式，执行a.numpy可以转成换np中的格式
a = torch.ones(5)
b = a.numpy()
b
输出：
array([1., 1., 1., 1., 1.], dtype=float32)

#np转torch
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
b
输出：
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

二. P5 （aotograd）自动求导机制

需要求导的，可以手动定义，运行反向传播，格式自动换换成requires_grad=True

#方法一
x = torch.randn(3, 4, requires_grad=True)
x
输出：
tensor([[ 2.8683,  0.6248,  0.0186,  0.0735],
        [ 0.0996, -2.1149, -0.0215,  0.2622],
        [ 0.7681,  0.9182, -1.5293,  0.9561]], requires_grad=True)
#方法2
x = torch.randn(3, 4)
x.requires_grad = True
x
输出：
tensor([[-0.6583, -0.0477, -0.4181, -0.5117],
        [-0.4690, -1.0255, -1.4632,  0.9888],
        [ 0.0579, -0.0639,  0.8040,  0.6931]], requires_grad=True)

b = torch.randn(3,4,requires_grad=True)
t = x + b
y = t.sum()
y
输出：
tensor(-1.6873, grad_fn=<SumBackward0>)

#t运行反向传播，格式自动换换成requires_grad=True
y.backward()
b.grad
输出：
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

在这里插入图片描述

2.虽然没有指定t的requires_grad=True但是用时会默认的

x.requires_grad, b.requires_grad, t.requires_grad
>>>(True, True, True)
#计算流程，先进行随机值的初始化
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)
y = w * x
z = y + b
 x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad
>>>(False, True, True, True)

x.is_leaf
#类似节点的意思，或叶子

#类似节点的意思，或叶子
x.is_leaf, b.is_leaf, w.is_leaf, y.is_leaf, z.is_leaf
>>>(True, True, True, False, False)

z.backward(retain_graph=True) 反向传播计算

z.backward(retain_graph=True)
#反向传播如何不清空的画会进行累加计算
w.grad
>>>tensor([1.7860])
b.grad
>>>tensor([2.])

三. P6-P7线性回归

构造一组输入数据X和其对应的标签y

x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape
#（11，1）

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train.shape
（11，1）

import torch
import torch.nn as nn

线性回归模型（其实线性回归就是一个不加激活函数的全连接层）

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        #nn.Linear用于设置全连接层
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)
model
#LinearRegressionModel(
  #(linear): Linear(in_features=1, out_features=1, bias=True)
)

指定好参数和损失函数

#torch.optim方法优化我们的神经网络,torch.optim是实现各种优化算法的包,SGD是一个优化算法
#class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)[source]
#实现随机梯度下降算法（momentum可选）。params (iterable) – 待优化参数的iterable或者是定义了参数组的dict；lr (float) – 学习率；momentum (float, 可选) – 动量因子（默认：0）
#weight_decay (float, 可选) – 权重衰减（L2惩罚）（默认：0）,dampening (float, 可选) – 动量的抑制因子（默认：0）,nesterov (bool, 可选) – 使用Nesterov动量（默认：False）
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

训练模型

for epoch in range(epochs):
    epoch += 1
    # 注意转行成tensor
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)

    # 梯度要清零每一次迭代，梯度清零
    optimizer.zero_grad() 

    # 前向传播
    outputs = model(inputs)

    # 计算损失，y(hat)
    loss = criterion(outputs, labels)

    # 返向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

测试模型预测结果

predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted
#将结果转化为ndarray格式

模型的保存与读取

torch.save(model.state_dict(), 'model.pkl')
model.load_state_dict(torch.load('model.pkl'))

使用GPU进行训练（只需要把数据和模型传入到cuda里面就可以了）

import torch
import torch.nn as nn
import numpy as np


class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)


criterion = nn.MSELoss()


learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad() 

    outputs = model(inputs)

    loss = criterion(outputs, labels)

    loss.backward()

    optimizer.step()

    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

四.Tensor常见的形式有哪些与hub模块（8，9）

0: scalar
1: vector
2: matrix
3: n-dimensional tensor

Scalar /通常就是一个数值
Vector
例如： [-5., 2., 0.]，在深度学习中通常指特征，例如词向量特征，某一维度特征等
𝑣⃗ =[𝑣1,𝑣2,…,𝑣𝑛]
###Matrix
一般计算的都是矩阵，通常都是多维的

hub模块

将别人的神经网络模型进行调用
GITHUB:https://github.com/pytorch/hub
模型：https://pytorch.org/hub/research-models

#下载架构或模型
import torch
model = torch.hub.load('pytorch/vision:v0.4.2', 'deeplabv3_resnet101', pretrained=True)
model.eval()