from numpy import *
import matplotlib.pyplot as plt
1.导入数据
points = genfromtxt('linear_regress_lsm_data.csv', delimiter=',')
length = len(points)
print('point count %d'%length)
x = array(points[:, 0])
y = array(points[:, 1])
plt.scatter(x, y)
plt.show
point count 100
<function matplotlib.pyplot.show(close=None, block=None)>

2.损失函数
def compute_cost(points, w, b):
total_cost = 0
length = len(points)
for i in range(length):
x = points[i, 0]
y = points[i, 1]
total_cost = (y - w * x - b) ** 2
return total_cost / float (length)
3.拆分训练集和测试集
from sklearn.model_selection import train_test_split
new_x = x.reshape(-1, 1)
new_y = y.reshape(-1, 1)
x_train,x_test,y_train,y_test = train_test_split(new_x,new_y, test_size=0.2)
4. 训练
from sklearn.linear_model import LinearRegression
lr_mode = LinearRegression()
lr_mode.fit(x_train, y_train)
print("b: ", lr.intercept_)
print("w: ", lr.coef_)
b: [7.99102098]
w: [[1.32243102]]
5.预测
y_predict = lr_mode.predict(x_test)
print("predict shape:",y_predict.shape)
print("predict value: ",y_predict[:10])
predict shape: (20, 1)
predict value: [[ 77.26304819]
[ 60.95061612]
[ 63.76912956]
[ 61.38694704]
[ 77.7424197 ]
[ 87.923399 ]
[ 73.92056411]
[ 83.6204053 ]
[ 75.28594104]
[100.67536067]]
6.画拟合曲线
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_predict)
print("均方误差: ", mse)
plt.scatter(x_test, y_test, color = 'hotpink')
plt.plot(x_test, y_predict, c = "r")
plt.scatter(x_train, y_train, color = '#88c999')
plt.show
均方误差: 104.5838657978367
<function matplotlib.pyplot.show(close=None, block=None)>
