文章目录
关于梯度的调试
调试方法,即求theta的方法不同,对于每一个theta,在其左右两边各取一个极小的值,然后两个值相减,求导数。
import numpy as np
X = np.random.random(size=(1000,10))
X_b = np.hstack([np.ones((len(X),1)), X])
true_theta = np.arange(1, 12, dtype='float')
y = X_b.dot(true_theta) + np.random.normal(size=len(X))
def j(theta,X_b,y):
try:
return np.sum((X_b.dot(theta)-y)**2)/len(X_b)
except:
return float('inf')
def dj_math(theta,X_b,y):
return X_b.T.dot(X_b.dot(theta)-y)*2./len(X_b)
def dj_debug(theta,X_b,y):
res=np.empty(len(theta))
epsilon = 0.01
for i in range(len(theta)):
theta1 = theta.copy()
theta2 = theta.copy()
theta1[i] +=epsilon
theta2[i] -=epsilon
res[i] = (j(theta1,X_b,y)-j(theta2,X_b,y))/(2*epsilon)
return res
def gradient_descent(dj,X_b,y,eta,initial_theta,n_iters=1e4,epsilon=1e-8):
theta = initial_theta
i_iter = 1
while i_iter<n_iters:
last_theta = theta
theta =theta- eta*dj(theta,X_b,y)
if abs(j(theta,X_b,y)-j(last_theta,X_b,y))<epsilon:
break
i_iter+=1
return theta