梯度下降例子(代码记录)(01)

import torch
import numpy as np
import os
import random

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

true_w = [2, -3]
true_b = 4
num_inputs = 2
num_examples = 1000


def get_train_data():
    features = torch.from_numpy(np.array(np.random.normal(0.0, 1.0, [num_examples, num_inputs]), dtype=np.float32))
    labels = features[:, 0] * true_w[0] + features[:, 1] * true_w[1] + true_b
    labels += torch.from_numpy(np.random.normal(0.0, 0.01, labels.shape))
    return features, labels


def data_iter(batch_size, features, labels):
    _num_examples = len(features)
    indices = list(range(_num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, _num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, _num_examples)])  # 最后⼀次可能不⾜⼀个batch
        yield features.index_select(0, j), labels.index_select(0, j)


def linear_reg(X, w, b):
    return torch.mm(X, w) + b


def squared_loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2


def sgd(params, lr, batch_size):
    for param in params:
        param.data -= param.grad * lr / batch_size

 

import torch
import numpy as np
import os
from utils import linear_reg, squared_loss, data_iter, get_train_data, sgd

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

features, labels = get_train_data()

num_inputs = 2
w = torch.tensor([[0], [0]], dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

print(f'w: {w.numpy()}')

w.requires_grad_(True)
b.requires_grad_(True)

batch_size = 10
lr = 0.01
num_epochs = 10
net = linear_reg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()
        l.backward()
        sgd([w, b], lr, batch_size)
        w.grad.data.zero_()
        b.grad.data.zero_()
        print(f'epoch {epoch}')
        print(f'loss: {l.item()}')
print(f'w: {w.clone().detach().numpy()}')
print(f'b: {b.clone().detach().numpy()}')

 

 

最速梯度下降法(Steepest Descent),也称为批量梯度下降,是一种优化算法,用于寻找函数的局部最小值。其基本思想是在每个迭代步骤中沿着当前估计值处的负梯度方向移动,以便逐步接近最小值点。 以下是使用Python和NumPy实现最速梯度下降的一个简单例子: ```python import numpy as np def gradient_descent(f, gradient_f, x0, learning_rate=0.01, tolerance=1e-6, max_iterations=1000): """ 使用最速梯度下降法求解最小化函数f的x 参数: f (callable): 目标函数 gradient_f (callable): 函数f的梯度 x0 (numpy array): 初始猜测 learning_rate (float): 学习率,默认为0.01 tolerance (float): 迭代停止准则,默认为1e-6 max_iterations (int): 最大迭代次数,默认为1000 返回: x_opt (numpy array): 最优解 loss_history (list): 每次迭代后的损失值列表 """ x_opt = x0.copy() loss_history = [] for _ in range(max_iterations): grad = gradient_f(x_opt) x_new = x_opt - learning_rate * grad loss_new = f(x_new) # 更新最优解并记录损失 if abs(loss_new - loss_history[-1]) < tolerance: break x_opt = x_new loss_history.append(loss_new) return x_opt, loss_history # 示例:假设我们有一个简单的二次函数f(x) = x^2 def quadratic_function(x): return x**2 def derivative_quadratic_function(x): return 2 * x # 初始化一个随机值作为起点 x0 = np.random.randn(1) x_opt, loss_history = gradient_descent(quadratic_function, derivative_quadratic_function, x0) print("Optimal solution:", x_opt) ``` 在这个例子中,`gradient_f`是一个计算目标函数f导数的函数,它返回的是一个向量,对应于函数在给定点的梯度。随着迭代的进行,如果损失值的变化足够小(小于给定的容忍度),算法就会认为找到了局部最小值,并停止。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值