pytorch线性回归【从零实现+简洁实现】-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_43721000/article/details/125771227

该博客介绍了如何使用PyTorch从零开始构建和训练线性回归模型，包括构造数据集、定义模型、损失函数和优化算法。首先，通过模拟数据并手动实现模型、损失和优化过程，然后利用PyTorch内置模块进行简洁实现，展示了PyTorch的便捷性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章目录

一、前言
二、实现
- 1.从零实现
- 2.简洁实现【使用pytorch提供的方法】

一、前言

从零实现

构造数据集生成迭代器
定义回归模型
定义损失函数【均方误差】
定义优化算法【SGD】
训练

简洁实现

构造数据集生成迭代器
使用 pytoch 的模块实现回归模型
使用 pytoch 的损失函数【均方误差】
使用 pytoch 的优化算法【SGD】
训练

ps：感谢李沐老师，虽然他不认识我（旺柴）

二、实现

1.从零实现

import random
import torch
from d2l import torch as d2l


# 模拟数据集 --------------------------------------------
def synthetic_data(w, b, num_examples):
    """
    生成 y = Xw + b + 噪声
    :return:
    """
    # 模拟特征x
    x = torch.normal(0, 1, (num_examples, len(w)))  # 均值为0方差为1，num_examples行，len(w)列【行代表有多少组特征，列代表每组特征的特征个数】
    # 模拟标签y
    y = torch.matmul(x, w) + b                      # 通过线性函数模拟【y值代表真实结果】
    y += torch.normal(0, 0.01, y.shape)             # 为y值增加均值为0方差为0.01的噪声
    y = y.reshape((-1, 1))                          # 转为列向量

    return x, y

# 构造数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
print(features)
# tensor([[-0.6286,  1.0225],
#         [ 1.0482, -0.6137],
#         [-0.3066,  0.6919],
#         ...,
#         [ 0.3020, -0.2817],
#         [ 1.3083,  1.4064],
#         [ 0.1264,  0.9362]])
print(labels)
# tensor([[-5.2968e-01],
#         [ 8.3766e+00],
#         [ 1.2278e+00],
#         ...,
#         [ 5.7573e+00],
#         [ 2.0380e+00],
#         [ 1.2725e+00]])

# # 查看数据分布 ------------------------------------------------------------------
# d2l.set_figsize()
# d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
# d2l.plt.show()


# 批次遍历器 ----------------------------------
def data_iter(batch_size, features, labels):

    # 获取全部数据的下标值，打乱顺序
    num_examples = len(features)            # 此处也可以用 len(labels) ，都是一样的长度
    indices = list(range(num_examples))     # 得到全部下标
    random.shuffle(indices)                 # 乱序

    # 遍历 features, labels 按 indices 下标以每批次 batch_size 个数据返回
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)])   # min(i + batch_size, num_examples) 意思是最后一批数据可能会超过数据集的大小，这时候就不按批次返回了，直接返回剩余全部数据即可
        yield features[batch_indices], labels[batch_indices]

batch_size = 10

# 打印一组数据康康
for x, y in data_iter(batch_size, features, labels):
    print("{}\n{}".format(x, y))
    # tensor([[ 1.8326,  0.7707],
    #         [ 0.2930,  1.7733],
    #         [-1.6511,  1.1953],
    #         [ 0.3137, -0.1215],
    #         [ 0.2067,  0.4356],
    #         [ 0.2309,  1.3944],
    #         [-1.7035,  1.0994],
    #         [ 0.2823,  0.0139],
    #         [-1.1442,  1.4188],
    #         [ 0.1316,  1.3700]])
    # tensor([[ 5.2469],
    #         [-1.2503],
    #         [-3.1862],
    #         [ 5.2271],
    #         [ 3.1401],
    #         [-0.0835],
    #         [-2.9385],
    #         [ 4.7317],
    #         [-2.9028],
    #         [-0.1943]])
    break


# 定义回归模型 -----------------------
def lin_reg(X, w, b):
    return torch.matmul(X, w) + b


# 初始化超参数 w,b -------------------------------------------------
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)  # 均值为0方差为0.01，两行一列，记录梯度
b = torch.zeros(1, requires_grad=True)                      # 初始化为0，记录梯度
print(w)
# tensor([[ 0.0084],
#         [-0.0166]], requires_grad=True)
print(b)
# tensor([0.], requires_grad=True)


# 损失函数 ------------------------------------------
def square_loss(y_hat, y, batch_size):
    """ 均方误差 """
    return (y_hat - y.reshape(y_hat.shape))**2 / 2 / batch_size  # 1.除以2是为了更方便的求导，2.除以 batch_size 求平均


# 定义优化算法SGD -------------------------------------
def sgd(params, lr):
    """ 小批量随机梯度下降 """
    with torch.no_grad():                           # 更新参数时不计算梯度
        for param in params:
            param -= lr * param.grad                # w1 = w0 - 学习率 * 损失函数关于w0的梯度
            param.grad.zero_()                      # 梯度归0


# 训练过程 ----------
lr = 0.03
num_epochs = 3
net = lin_reg
loss = square_loss

for epoch in range(num_epochs):
    for x, y in data_iter(batch_size, features, labels):
        l = loss(net(x, w, b), y, batch_size)   # 前向传播，计算损失
        l.sum().backward()                      # 结果求和，反向传播计算梯度
        sgd([w, b], lr)                         # 更新参数

    # 每个 epoch 打印更新参数后的当前损失值
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels, batch_size)
        print("epoch: {}, loss: {}".format(epoch+1, train_l.mean()))
        # epoch: 1, loss: 0.0036771001759916544
        # epoch: 2, loss: 1.2870842510892544e-05
        # epoch: 3, loss: 4.923712822346715e-06


# 比较真实值与预测值 ---------------
print("w预测误差：{}".format(true_w - w.reshape(true_w.shape)))
print("b预测误差：{}".format(true_b - b))
# w预测误差：tensor([5.1260e-06, 3.6597e-04], grad_fn=<SubBackward0>)
# b预测误差：tensor([0.0010], grad_fn=<RsubBackward1>)

2.简洁实现【使用pytorch提供的方法】

import random
import torch
from d2l import torch as d2l


# 模拟数据集 --------------------------------------------
def synthetic_data(w, b, num_examples):
    """
    生成 y = Xw + b + 噪声
    :return:
    """
    # 模拟特征x
    x = torch.normal(0, 1, (num_examples, len(w)))  # 均值为0方差为1，num_examples行，len(w)列【行代表有多少组特征，列代表每组特征的特征个数】
    # 模拟标签y
    y = torch.matmul(x, w) + b                      # 通过线性函数模拟【y值代表真实结果】
    y += torch.normal(0, 0.01, y.shape)             # 为y值增加均值为0方差为0.01的噪声
    y = y.reshape((-1, 1))                          # 转为列向量

    return x, y

# 构造数据集
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
print(features)
# tensor([[-0.6286,  1.0225],
#         [ 1.0482, -0.6137],
#         [-0.3066,  0.6919],
#         ...,
#         [ 0.3020, -0.2817],
#         [ 1.3083,  1.4064],
#         [ 0.1264,  0.9362]])
print(labels)
# tensor([[-5.2968e-01],
#         [ 8.3766e+00],
#         [ 1.2278e+00],
#         ...,
#         [ 5.7573e+00],
#         [ 2.0380e+00],
#         [ 1.2725e+00]])

# # 查看数据分布 ------------------------------------------------------------------
# d2l.set_figsize()
# d2l.plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)
# d2l.plt.show()


# 批次遍历器 ----------------------------------
from torch.utils import data

batch_size = 10
data_iter = data.DataLoader(
    data.TensorDataset(*(features, labels)),    # 转成Tensor迭代器
    batch_size,                                 # 批次大小
    shuffle=True                                # 是否乱序
)

# 打印一组数据康康【真实训练时不要打印出来康，否则会导致迭代器在训练数据前就自行迭代了一次，浪费一批数据】
print(next(iter(data_iter)))
# [
#     tensor([[-0.1174,  0.9516],
#             [-0.6164,  0.3813],
#             [-0.9736,  1.9661],
#             [-1.2993,  0.5336],
#             [-1.0567,  1.6242],
#             [ 0.5484,  0.3681],
#             [ 1.3446,  0.2446],
#             [-1.4124, -0.5372],
#             [-1.3326, -0.3083],
#             [ 1.8234, -0.0221]]),
#     tensor([[ 0.7119],
#             [ 1.6712],
#             [-4.4386],
#             [-0.2142],
#             [-3.4445],
#             [ 4.0427],
#             [ 6.0445],
#             [ 3.2062],
#             [ 2.5751],
#             [ 7.9381]])
# ]


# 定义回归模型 -----------------------
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))    # Linear线性层或者叫全连接层，输入是2维，输出是1维


# 初始化超参数 w,b -------------------
# net[0] 就是访问 Sequential 中的第一个 Linear
net[0].weight.data.normal_(0, 0.01)     # w均值0方差0.01
net[0].bias.data.fill_(0)               # b初始为0


# 损失函数 ------------------------------------------
loss = nn.MSELoss()


# 定义优化算法SGD -------------------------------------
trainer = torch.optim.SGD(
    net.parameters(),           # net的所有可训练参数【w, b】
    lr=0.03                     # 学习率
)


# 训练过程 -----------------------
num_epochs = 3
for epoch in range(num_epochs):
    for x, y in data_iter:
        l = loss(net(x), y)     # 前向传播，计算损失
        trainer.zero_grad()     # 梯度清零
        l.backward()            # 反向传播计算梯度【pytorch已经自动sum了，所以这里不用求和】
        trainer.step()          # 更新参数

    # 每个 epoch 打印更新参数后的当前损失值
    l = loss(net(features), labels)
    print("epoch: {}, loss: {}".format(epoch+1, l))
    # epoch: 1, loss: 0.00029666078626178205
    # epoch: 2, loss: 0.00010394241689937189
    # epoch: 3, loss: 0.00010382977779954672