from collections import OrderedDict
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
examDict={'学习时间':[-3,-2,-1,0,0.1,0.4,0.50,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.3,2.50,2.75,3.00,3.25,3.4,3.7,3.50,4.00,4.25,4.50,4.75,5.00,5.50,6.00,10.00,11.00],
'通过考试':[0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0]}
examOrdereDict=OrderedDict(examDict)
examDF=pd.DataFrame(examOrdereDict)
exam_x=examDF['学习时间']
exam_y=examDF['通过考试']
x_train,x_test,y_train,y_test=train_test_split(exam_x,exam_y,test_size=0.8)
print('训练特征数量:',x_train.shape,'测试特征数量:',x_test.shape,'样本特征数量:',exam_x.shape)
x_train=x_train.values.reshape(-1,1)
x_test=x_test.values.reshape(-1,1)
exam_x=exam_x.values.reshape(-1,1)
model=LogisticRegression()
model.fit(x_train,y_train)
model.score(x_test,y_test)
print(model.predict_proba([[3]]))
a=model.intercept_
b=model.coef_
za=a+b*exam_x
ypred=1/(1+np.exp(-za))
print(ypred)
plt.xlim((-5, 13))
plt.scatter(exam_x,ypred,color="red",label='exam data',linewidth=0.5)
plt.plot(exam_x,ypred, color='blue',label="predict line",linewidth=3)
plt.legend(loc='lower right')
plt.show()
结果:
对数几率回归模型(预测部分没有写但比较简单也就是一行代码,在sklearn文档上找一下就可以了)