momentum:
v的形状与params[key] 相同,与grads[key]相同。
v[key] = momentum*v[key] - lr*grads[key]
params[key] += v[key]
当momentum=0时,就是SGD
optimizer=Momentum(0.8,0)#相当于sgd,可以看到有多之字形震荡:
optimizer=Momentum(0.8,0.2),使用momentum后,得到优化
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.gradient import numerical_gradient
import matplotlib.pylab as plt
class simple:
def __init__(self,x,y):
self.x=x
self.y=y
def f(self):
z=np.sum((self.x**2)/20+self.y**2)
return z
def grad(self):
grads={}
self.f()
Func=lambda W:self.f()
grads['x']=numerical_gradient(Func,self.x)
grads['y']=numerical_gradient(Func,self.y)
return grads
class Momentum:
def __init__(self,lr=0.01,momentum=0.9):
self.lr=lr
self.momentum=momentum
self.v=None
def update(self,params,grads):
if self.v is None:
self.v={}
for key,value in params.items():
self.v[key]=np.zeros_like(value)
for key in params.keys():
self.v[key]=self.momentum*self.v[key]-self.lr*grads[key]
params[key]+=self.v[key]
params={}
params['x']=np.array([-5.0])
params['y']=np.array([-5.0])
Net=simple(params['x'],params['y'])
#optimizer=Momentum(0.8,0)#相当于sgd
optimizer=Momentum(0.8,0.2)
opt_num =200
result_x=np.arange(opt_num,dtype=np.float)
result_y=np.arange(opt_num,dtype=np.float)
for i in range(opt_num):
grads=Net.grad()
result_x[i]=params['x']
result_y[i]=params['y']
optimizer.update(params,grads)
plt.plot(result_x,result_y,0.01)
print(result_x)
print(result_y)
plt.show()