#用于回归算法
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
filename = 'housing.csv'
names = ['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS',
'RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data = read_csv(filename,names=names,delim_whitespace=True)
array = data.values
X = array[:,0:13]
Y = array[:,13]
n_splits = 10
seed = 7
kflod = KFold(n_splits=n_splits,random_state=seed)
model = LinearRegression()
#平均绝对误差
# scoring = 'neg_mean_absolute_error'
# result = cross_val_score(model,X,Y,cv=kflod,scoring=scoring)
# print("MAE:%.3f (%.3f)" %(result.mean(),result.std()))
#均方误差
# scoring = 'neg_mean_squared_error'
# result = cross_val_score(model,X,Y,cv=kflod,scoring=scoring)
# print("MSE:%.3f (%.3f)" %(result.mean(),result.std()))
#决定系数
'''反应因变量的全部变异能通过回归关系被自变量解释的比例'''
scoring = 'r2'
result = cross_val_score(model,X,Y,cv=kflod,scoring=scoring)
print("R2:%.3f (%.3f)" %(result.mean(),result.std()))