前言
本文主要是对动手学深度学习中第7章优化算法的总结,对常见的几种优化算法用pytorch做了简单实现,包括SGD,momentum, AdaGrad, RMSProp, AdaDelta, Adam和学习率衰减策略warmup, 余弦退火(Cosine Annealing)
用到的数据集为:飞机机翼噪音数据集https://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise
参考文档:
https://zh.d2l.ai/chapter_optimization/minibatch-sgd.html
https://blog.youkuaiyun.com/weixin_35848967/article/details/108493217
各个算法的理论公式
SGD
梯度计算:
参数更新:
momentum
AdaGrad
s状态更新
参数更新:
RMSProp
AdaDelta
Adam
余弦退火
代码
import time
import numpy as np
import math
from matplotlib import pyplot as plt
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
class LinearModel(nn.Module):
def __init__(self, ci, nc):
super(LinearModel, self).__init__()
self.fc1 = nn.Linear(ci, nc)
def forward(self, x):
x = self.fc1(x)
return x
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
m.bias.data.zero_()
class AirFoilDatasets(data.Dataset):
def __init__(self, path):
super(AirFoilDatasets, self).__init__()
self.data = np.genfromtxt(path, delimiter='\t')
self.data = (self.data - self.data.mean(axis=0)) / self.data.std(axis=0)
self.data = self.data[:1500, :]
def __getitem__(self, index):
features = self.data[index, :-1]
labels = self.data[index, -1]
return torch.tensor(features, dtype