李沐【动手学深度学习V2】-线性回归+基础优化算法（2023.06.20）

本文链接：https://blog.youkuaiyun.com/HZZC_/article/details/131300527

该代码示例展示了如何在PyTorch中从零开始实现线性回归模型，包括数据生成、小批量处理、模型定义、损失函数（均方损失）以及优化算法（小批量随机梯度下降）。通过模拟数据来训练模型并监控训练过程中的损失变化。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一、简单模型理解

在这里插入图片描述

注意，这里对w求导少了一个负号，最后w*的Xy中的X应该注意是X的转置

在这里插入图片描述

二、基础优化方法

在这里插入图片描述

三、线性回归从0开始实现

（1）
在这里插入图片描述

import pandas as pd
import random
import torch
from d2l import torch as d2l
import matplotlib.pyplot as plt

def synthetic_data(w,b,num_examples):        #生成y=Xw+b+噪声

    X=torch.normal(0,1,(num_examples,len(w)))#生成均值为0，标准差为1的随机数，X这里是个矩阵，行数表示有多少个样本，列数表示有多少个feature
    y=torch.matmul(X,w)+b                    #矩阵之间乘积+b，通过广播机制把b扩展成向量
    y+=torch.normal(0,0.01,y.shape)          #手动加噪声，均值为0，标准差为0.01,加的形状和y的一样
    return X,y.reshape((-1,1))               #reshape(-1,1)根据元素个数以及列的个数自动计算出行数
true_w=torch.tensor([2,-3.4])
true_b=4.2
features,labels=synthetic_data(true_w,true_b,1000)#以房屋售价为例，label就是真实售价，feature是预测label的两个因素
d2l.set_figsize()
d2l.plt.scatter(features[:,1].detach().numpy(),   #有些tensor从计算图中detach出来才能转numpy类型
                labels.detach().numpy(),1)
plt.show()

在这里插入图片描述
（2）定义函数，此函数接受批量大小、特征矩阵和标签向量作为输入，生成大小为batch_size的小批量

def data_iter(batch_size,features,labels):      #batch_size:批量大小   features:影响房价因素X  labels：真实房价
    num_examples=len(features)        #获取多少个样本
    indices=list(range(num_examples)) #生成0到num_examples-1个下标
    random.shuffle(indices)           #将下标打乱
    for i in range(0,num_examples,batch_size):
        batch_indices=torch.tensor(
            indices[i:min(i+batch_size,num_examples)])
        yield features[batch_indices],labels[batch_indices]      #返回迭代器,假设有1000个元素，100个一组打包，打包了10组，每次都会访问一组
batch_size=10
for X,y in data_iter(batch_size,features=features,labels=labels):
    print(X,"\n",y)
    break

（3）定义初始化模型参数和定义模型
在这里插入图片描述

w=torch.normal(0,0.01,size=(2,1),requires_grad=True)  #定义初始化模型参数
b=torch.zeros(1,requires_grad=True)
def linreg(X,w,b):          #定义模型
    return torch.matmul(X,w)+b

（4）定义损失函数和优化算法
在这里插入图片描述

def squared_loss(y_hat,y):           #定义损失函数
    """均方损失。"""
    return (y_hat-y.reshape(y_hat.shape))**2/2
def sgd(params,lr,batch_size):        #小批量随机梯度下降
    with torch.no_grad():
        for param in params:
            param-=lr*param.grad/batch_size        #param.grad 就相当于损失值对w或者b求导，这是÷batch_size是为了取平均
            param.grad.zero_()

（5）训练过程

lr=0.03
num_epochs=3
net=linreg
loss=squared_loss
for epoch in range(num_epochs):
    for X,y in data_iter(batch_size,features,labels):
        l=loss(net(X,w,b),y)
        l.sum().backward()
        sgd([w,b],lr,batch_size)
    with torch.no_grad():
        train_1=loss(net(features,w,b),labels)
        print(f"epoch{epoch+1},loss{float(train_1.mean()):f}")

在这里插入图片描述

所有代码

import pandas as pd
import random
import torch
from d2l import torch as d2l
import matplotlib.pyplot as plt
def synthetic_data(w,b,num_examples):        #生成y=Xw+b+噪声，这里的y是真实房价

    X=torch.normal(0,1,(num_examples,len(w)))#生成均值为0，标准差为1的随机数，X这里是个矩阵，行数表示有多少个样本，列数表示有多少个feature
    y=torch.matmul(X,w)+b                    #矩阵之间乘积+b，通过广播机制把b扩展成向量
    y+=torch.normal(0,0.01,y.shape)          #手动加噪声，均值为0，标准差为0.01,加的形状和y的一样
    return X,y.reshape((-1,1))               #reshape(-1,1)根据元素个数以及列的个数自动计算出行数
true_w=torch.tensor([2,-3.4])
true_b=4.2
features,labels=synthetic_data(true_w,true_b,1000)#以房屋售价为例，label就是真实售价，feature是预测label的两个因素
def data_iter(batch_size,features,labels):      #batch_size:批量大小   features:影响房价因素X  labels：真实房价
    num_examples=len(features)        #获取多少个样本
    indices=list(range(num_examples)) #生成0到num_examples-1个下标
    random.shuffle(indices)           #将下标打乱
    for i in range(0,num_examples,batch_size):
        batch_indices=torch.tensor(
            indices[i:min(i+batch_size,num_examples)])
        yield features[batch_indices],labels[batch_indices]      #返回迭代器,假设有1000个元素，100个一组打包，打包了10组，每次都会访问一组
batch_size=10
for X,y in data_iter(batch_size,features=features,labels=labels):
    print(X,"\n",y)
    break
w=torch.normal(0,0.01,size=(2,1),requires_grad=True)  #定义初始化模型参数
b=torch.zeros(1,requires_grad=True)
def linreg(X,w,b):          #定义模型
    return torch.matmul(X,w)+b
def squared_loss(y_hat,y):           #定义损失函数
    """均方损失。"""
    return (y_hat-y.reshape(y_hat.shape))**2/2
def sgd(params,lr,batch_size):        #小批量随机梯度下降
    with torch.no_grad():
        for param in params:
            param-=lr*param.grad/batch_size        #param.grad 就相当于损失值对w或者b求导，这是÷batch_size是为了取平均
            param.grad.zero_()
lr=0.03
num_epochs=3
net=linreg
loss=squared_loss
for epoch in range(num_epochs):
    for X,y in data_iter(batch_size,features,labels):
        l=loss(net(X,w,b),y)
        l.sum().backward()
        sgd([w,b],lr,batch_size)
    with torch.no_grad():
        train_1=loss(net(features,w,b),labels)
        print(f"epoch{epoch+1},loss{float(train_1.mean()):f}")