# Use linear model to model this data.
from sklearn.linear_model import LinearRegression
import numpy as np
lr=LinearRegression()
lr.fit(pga.distance[:,np.newaxis],pga['accuracy']) # Another way is using pga[['distance']]
theta0=lr.intercept_
theta1=lr.coef_
print(theta0)
print(theta1)
#calculating cost-function for each theta1
#计算平均累积误差
def cost(x,y,theta0,theta1):
J=0
for i in range(len(x)):
mse=(x[i]*theta1+theta0-y[i])**2
J+=mse
return J/(2*len(x))
theta0=100
theta1s = np.linspace(-3,2,197)
costs=[]
for theta1 in theta1s:
costs.append(cost(pga['distance'],pga['accuracy'],theta0,theta1))
plt.plot(theta1s,costs)
plt.show()
print(pga.distance)
#调整theta
def partial_cost_theta0(x,y,theta0,theta1):
#我们的模型是线性拟合函数时:y=theta1*x + theta0,而不是sigmoid函数,当非线性时我们可以用sigmoid
#直接多整个x series操作,省的一个一个计算,最终求sum 再平均
h=theta1*x+theta0
diff=(h-y)
partial=diff.sum()/len(diff)
return partial
partial0=partial_cost_theta0(pga.distance,pga.accuracy,1,1)
def partial_cost_theta1(x,y,theta0