分析以下代码:import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
'''比较四种优化器在回归任务的性能'''
# 设置随机种子保证可重复性
torch.manual_seed(42)
np.random.seed(42)
# 超参数设置
LR = 0.01 # 学习率
BATCH_SIZE = 32 # 批量大小
EPOCH = 12 # 训练轮数
N_SAMPLES = 1000 # 样本数量
# 生成模拟数据集
def generate_data(n_samples):
x = torch.linspace(-1, 1, n_samples).unsqueeze(1)
# y = x^2 + 高斯噪声
y = x.pow(2) + 0.1 * torch.normal(mean=torch.zeros(n_samples, 1), std=0.1)
return x, y
# 创建数据加载器
def create_dataloader(x, y, batch_size):
dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(
dataset=dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0 # 避免在非Jupyter环境下出现问题
)
return loader
# 定义简单的神经网络模型
class RegressionNet(nn.Module):
def __init__(self):
super(RegressionNet, self).__init__()
self.hidden = nn.Linear(1, 20) # 输入层到隐藏层
self.predict = nn.Linear(20, 1) # 隐藏层到输出层
def forward(self, x):
x = torch.relu(self.hidden(x)) # ReLU激活函数
return self.predict(x) # 线性输出
# 训练函数
def train_optimizers(epochs, loader, optimizers, nets, loss_func):
losses_history = [[] for _ in range(len(optimizers))] # 记录每个优化器的损失
for epoch in range(epochs):
print(f'Epoch [{epoch + 1}/{epochs}]')
for step, (batch_x, batch_y) in enumerate(loader):
for i, (net, optimizer) in enumerate(zip(nets, optimizers)):
# 前向传播
prediction = net(batch_x)
loss = loss_func(prediction, batch_y)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录损失
losses_history[i].append(loss.item())
return losses_history
# 主函数
def main():
# 生成并可视化数据
x, y = generate_data(N_SAMPLES)
plt.figure(figsize=(8, 5))
plt.scatter(x.numpy(), y.numpy(), s=5, alpha=0.7)
plt.title('Regression Dataset: $y = x^2 + \epsilon$')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()
# 创建数据加载器
loader = create_dataloader(x, y, BATCH_SIZE)
# 创建模型实例
net_sgd = RegressionNet()
net_momentum = RegressionNet()
net_rmsprop = RegressionNet()
net_adam = RegressionNet()
# 创建优化器
optimizer_sgd = optim.SGD(net_sgd.parameters(), lr=LR)
optimizer_momentum = optim.SGD(net_momentum.parameters(), lr=LR, momentum=0.8)
optimizer_rmsprop = optim.RMSprop(net_rmsprop.parameters(), lr=LR, alpha=0.9)
optimizer_adam = optim.Adam(net_adam.parameters(), lr=LR, betas=(0.9, 0.99))
nets = [net_sgd, net_momentum, net_rmsprop, net_adam]
optimizers = [optimizer_sgd, optimizer_momentum, optimizer_rmsprop, optimizer_adam]
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
# 损失函数
loss_func = nn.MSELoss()
# 训练模型
losses_history = train_optimizers(EPOCH, loader, optimizers, nets, loss_func)
# 绘制损失曲线
plt.figure(figsize=(12, 8))
for i, losses in enumerate(losses_history):
# 使用滑动平均平滑曲线
smooth_losses = np.convolve(losses, np.ones(50) / 50, mode='valid')
plt.plot(smooth_losses, label=labels[i], alpha=0.8)
plt.title('Optimizer Performance Comparison')
plt.xlabel('Training Steps')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.grid(True)
plt.ylim(0, 0.2) # 聚焦在较低的损失范围
plt.tight_layout()
plt.show()
# 可视化预测结果
plt.figure(figsize=(12, 8))
test_x = torch.linspace(-1.5, 1.5, 200).unsqueeze(1)
for i, net in enumerate(nets):
with torch.no_grad():
prediction = net(test_x)
plt.plot(test_x.numpy(), prediction.numpy(),
label=f'{labels[i]} Prediction', linewidth=2.5)
plt.scatter(x.numpy(), y.numpy(), s=10, c='gray', alpha=0.4, label='True Data')
plt.title('Model Predictions vs True Data')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.xlim(-1.5, 1.5)
plt.ylim(-0.1, 1.5)
plt.tight_layout()
plt.show()
if __name__ == '__main__':
EPOCH = EPOCH # 确保使用正确的变量名
main()