1from sklearn.datasets import load_boston import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.linear_model import SGDRegressor plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 x = load_boston().data y = load_boston().target print(load_boston().DESCR) #显示数据集的属性 #数据处理 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33) #分析回归目标值的差异 print( 'The max target value is ',np.max(y)) print ('The min target value is ',np.min(y)) print ('The average target value is ',np.mean(y)) #标准化 ss_X = StandardScaler() ss_y = StandardScaler() X_train = ss_X.fit_transform(X_train) X_test = ss_X.transform(X_test) y_train = ss_y.fit_transform(y_train.reshape(-1,1)) y_test = ss_y.transform(y_test.reshape(-1,1)) #LR模型 lr = LinearRegression() lr.fit(X_train, y_train) lr_y_predict = lr.predict(X_test) #SGDRRegressor sgdr = SGDRegressor() sgdr.fit(X_train, y_train) sgdr_predict = sgdr.predict(X_test) #第五步:性能测评 #主要是判断预测值与真实值之间的差距,比较直观的评价指标有 #平均绝对值误差MAE(mean absolute error) #均方误差MSE(mean squared error) #R-squared评价函数 #使用LinearRegression模型自带的评估模块,并输出评估结果 print ('the value of default measurement of LR:',lr.score(X_test,y_test)) from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error print ('the value of R-squared of LR is',r2_score(y_test,lr_y_predict)) #可以使用标准化器中的inverse_transform函数还原转换前的真实值 print ('the MSE of LR is',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(lr_y_predict))) print ('the MAE of LR is',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(lr_y_predict))) #使用SGDRegressor自带的评估模块,并输出评估结果 print ('the value of default measurement of SGDR:',sgdr.score(X_test,y_test)) from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error print( 'the value of R-squared of SGDR is',r2_score(y_test,sgdr_predict)) print ('the MSE of SGDR is',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(sgdr_predict))) print ('the MAE of SGDR is',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(sgdr_predict))) #总结: #从输出结果来看,回归模型自带的评估结果与r2_score的值是一样的,推荐使用第一种方式 #SGDRegressor在性能上表现略逊于LinearRegression,前者是随机梯度下降的方式估计参数,后者是精确解析参数 #在数据量十分庞大(10W+)的时候,推荐使用SGDRegressor