说实话,觉得写错了。。。只是应付一下课堂作业
import numpy as np
import pandas as pd
import numpy.random
import time
import matplotlib.pyplot as plt
%matplotlib inline
data=pd.read_csv('Folds5x2_pp.CSV')
就是开始梯度大大大大大大,当时就觉得不太对,但是懒得重新看了,给数据处理下凑合用吧
a1=data.loc[:,"AT"].max()
a2=data.loc[:,"V"].max()
a3=data.loc[:,"AP"].max()
a4=data.loc[:,"RH"].max()
a5=data.loc[:,"PE"].max()
data=pd.DataFrame({'a1':data['AT']/a1,
'a2':data['V']/a2,
'a3':data['AP']/a3,
'a4':data['RH']/a4,
'a5':data['PE']/a5})
data.insert(0,'f',1)
Udata=data.values
x=Udata[:,0:5]
y=Udata[:,5:6]
theta=np.zeros([1,5])
def hx(x,theta):
return np.dot(x,theta.T)
def loss(x,y,theta):
a=np.square(hx(x,theta)-y)
return a.sum()/(len(y))
def pd(x,y,theta):
grad=np.zeros(theta.shape)#梯度结果,占位
error=(y-hx(x,theta)).ravel()# ravel 多维数组拉成一维数组
for i in range(len(theta.ravel())):# 所有参数的偏导
grad[0,i]=(np.sum(np.multiply(error,x[:,i])))
return grad
def DM2(x,y,theta,YuZhi,learnning_Rate):
start_time=time.time()
i=0
k=0
grad=np.zeros(theta.shape)
Loss=[loss(x,y,theta)]
while True:
grad=pd(x[k:k+1],y[k:k+1],theta)
k+=1
if k>=len(x):
k=0
theta=theta+(learnning_Rate*grad)
Loss.append(loss(x,y,theta))
i+=1
if i>YuZhi:
break
return theta,i-1,Loss,time.time()-start_time
ENDtheta,times,ENDcosts,spend=DM2(x,y,theta,15000,0.0001)
print('耗时:%f'%(spend))
fig,ax=plt.subplots(figsize=(12,4))
ax.plot(np.arange(len(ENDcosts)),ENDcosts,'r')
ax.set_xlabel("Iter")
ax.set_ylabel("Loss")
耗时:2.377032
ENDcosts[-1]
0.006015465001446885