记录7个模型(逻辑回归、SVM、决策树、随机森林、GBDT、XGBoost和LightGBM)关于accuracy、precision,recall和F1-score、auc值的评分表格,并画出ROC曲线。
(时间太紧了直接上代码了嘤嘤)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('data_all.csv')
df.head()
y = df['status']
x = df.drop('status', axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2018)
lr = LogisticRegression()
lr.fit(x_train,y_train)
svm = SVC(probability=True)
svm.fit(x_train,y_train)
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
gbdt=GradientBoostingClassifier()
gbdt.fit(x_train,y_train)
rfc=RandomForestClassifier()
rfc.fit(x_train,y_train)
gbm=LGBMClassifier()
gbm.fit(x_train, y_train)
xgb=XGBClassifier()
xgb.fit(x_train, y_train)
def get_scores(y_true, y_predicet, y_predict_pro):
acc_score = metrics.accuracy_score(y_true,y_predicet)
pre_score = metrics.precision_score(y_true,y_predicet)
recall = metrics.recall_score(y_true,y_predicet)
F1 = metrics.f1_score(y_true,y_predicet)
auc_score = metrics.roc_auc_score(y_true,y_predict_pro)
print('准确率:',acc_score)
print('精确率:',pre_score)
print('召回率:',recall)
print('F1-score:',F1)
print('AUC',auc_score)
print('LogisticRegression: ')
lr_predict = lr.predict(x_test)
lr_predictPro = lr.predict_proba(x_test)[:,1]
get_scores(y_test, lr_predict, lr_predictPro)
print('svm: ')
svm_predict = svm.predict(x_test)
svm_predictPro = svm.predict_proba(x_test)[:,1]
get_scores(y_test, svm_predict, svm_predictPro)
print('DecisionTreeClassifier: ')
dt_predict = dt.predict(x_test)
dt_predictPro = dt.predict_proba(x_test)[:,1]
get_scores(y_test, dt_predict, dt_predictPro)
print('GradientBoostingClassifier: ')
gbdt_predict = gbdt.predict(x_test)
gbdt_predictPro = gbdt.predict_proba(x_test)[:,1]
get_scores(y_test, gbdt_predict, gbdt_predictPro)
print('RandomForestClassifier: ')
rfc_predict = rfc.predict(x_test)
rfc_predictPro = rfc.predict_proba(x_test)[:,1]
get_scores(y_test, rfc_predict, rfc_predictPro)
print('lgb: ')
gbm_predict = gbm.predict(x_test)
gbm_predictPro = gbm.predict_proba(x_test)[:,1]
get_scores(y_test, gbm_predict, gbm_predictPro)
print('xgb: ')
xgb_predict = xgb.predict(x_test)
xgb_predictPro = xgb.predict_proba(x_test)[:,1]
get_scores(y_test, xgb_predict, xgb_predictPro)
fpr_lr,tpr_lr,thresholds_lr = metrics.roc_curve(y_test,lr_predictPro)
fpr_svm,tpr_svm,thresholds_svm = metrics.roc_curve(y_test,svm_predictPro)
fpr_rfc,tpr_rfc,thresholds_rfc = metrics.roc_curve(y_test,rfc_predictPro)
fpr_dt,tpr_dt,thresholds_dt = metrics.roc_curve(y_test,dt_predictPro)
fpr_gbdt,tpr_gbdt,thresholds_gbdt = metrics.roc_curve(y_test,gbdt_predictPro)
fpr_xgb,tpr_xgb,thresholds_xgb = metrics.roc_curve(y_test,xgb_predictPro)
fpr_gbm,tpr_gbm,thresholds_gbm = metrics.roc_curve(y_test,gbm_predictPro)
plt.figure(figsize=[6,6])
plt.plot(fpr_lr,tpr_lr, color='black', label='LogisticRegression')
plt.plot(fpr_svm,tpr_svm, color='red', label='svm')
plt.plot(fpr_rfc,tpr_rfc, color='green', label='RandomForestClassifier')
plt.plot(fpr_dt,tpr_dt, color='blue', label='DecisionTreeClassifier')
plt.plot(fpr_gbdt,tpr_gbdt, color='yellow', label='GradientBoostingClassifier')
plt.plot(fpr_xgb,tpr_xgb, color='brown', label='xgb')
plt.plot(fpr_gbm,tpr_gbm, color='purple', label='lgb')
plt.title('ROC curve')
plt.legend(loc='lower right')
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
结果如下:
LogisticRegression:
准确率: 0.7484232655921513
精确率: 0.0
召回率: 0.0
F1-score: 0.0
AUC 0.5674548527432631
svm:
准确率: 0.7484232655921513
精确率: 0.0
召回率: 0.0
F1-score: 0.0
AUC 0.5
DecisionTreeClassifier:
准确率: 0.6720392431674842
精确率: 0.3640897755610973
召回率: 0.40668523676880225
F1-score: 0.3842105263157895
AUC 0.5839605959124909
GradientBoostingClassifier:
准确率: 0.7792571829011913
精确率: 0.6057692307692307
召回率: 0.35097493036211697
F1-score: 0.4444444444444445
AUC 0.7632651038569477
RandomForestClassifier:
准确率: 0.7666433076384023
精确率: 0.5783132530120482
召回率: 0.26740947075208915
F1-score: 0.3657142857142857
AUC 0.7160000208652834
lgb:
准确率: 0.7701471618780659
精确率: 0.5701357466063348
召回率: 0.35097493036211697
F1-score: 0.43448275862068964
AUC 0.7574019592501017
xgb:
准确率: 0.7855641205325858
精确率: 0.6305418719211823
召回率: 0.3565459610027855
F1-score: 0.4555160142348754
AUC 0.7713634419371329