# -*- coding:utf-8 -*-
import time
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
warnings.filterwarnings('ignore')
# 设置显示的字体 指令:fc-list :lang=zh
myfont = FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
rcParams['axes.unicode_minus'] = False # 解决负号'-'显示方块的问题
# 一,加载数据
df = pd.read_csv(filepath_or_buffer="", sep=';')
# 二,数据清洗,划分
new_df = df.replace('?',np.nan)
data = new_df.dropna(axis=0, how='any')
X = data[:,0:2].astype(np.float)
Y = data[:,4]].astype(np.float)
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
# 三,定义模型
algo = Pipeline(steps=[
('scaler', StandardScaler()),
('poly', PolynomialFeatures(degree=2)),
('lr', LinearRegression(fit_intercept=False))
])
# 四,训练
algo.fit(x_train,y_train)
print(algo.get_params()['poly'].get_feature_names())
print(algo.get_params()['lr'].coef_)
print(algo.get_params()['lr'].intercept_)
# 五,模型测试
y_predict = algo.predict(x_test).reshape((-1,1))
y_test = np.array(y_test).reshape((-1,1))
j_theta = np.mean(np.power(y_predict-y_test,2))
print(algo.score(x_test,y_test))
print(algo.score(x_train,y_train))
# 六,画图显示
t = np.arange(len(x_test))
fig = plt.figure(facecolor='w')
ax1 = fig.add_subplot(1,1,1)
ax1.plot(t,y_test,'r-',linewidth=2)
ax1.plot(t,y_predict,'g-',linewidth=2)
ax1.set_xlabel(u'x轴标签', fontproperties=myfont)
ax1.set_ylabel(u'y轴标签', fontproperties=myfont)
ax1.set_title(u"线性回归预测的关系", fontsize=20,fontproperties=myfont)
ax1.legend([u'真实值',u'预测值'],loc='lower right',prop=myfont)
ax1.grid(b=True)
plt.show()
机器学习 sklearn线性回归
最新推荐文章于 2024-07-14 14:24:15 发布