Task3 模型调优

使用网格搜索法对7个模型进行调优(调参时采用五折交叉验证的方式),并进行模型评估

import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings(action ='ignore', category = DeprecationWarning)

df = pd.read_csv('data_all.csv')
df.head()

y = df['status']
x = df.drop('status', axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2018)

lr = LogisticRegression()
lr.fit(x_train,y_train)
svm = SVC(probability=True)
svm.fit(x_train,y_train)
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
gbdt=GradientBoostingClassifier()
gbdt.fit(x_train,y_train)
rfc=RandomForestClassifier()
rfc.fit(x_train,y_train)
gbm=LGBMClassifier()
gbm.fit(x_train, y_train)
xgbc=XGBClassifier()
xgbc.fit(x_train, y_train)

def gride_search(model,para):
    grid = GridSearchCV(model,para,cv = 5,scoring  = 'accuracy')
    grid = grid.fit(x_train,y_train)
    if hasattr(model,'decision_function'):
        y_predict_pro = grid.decision_function(x_test)
        y_default_predict_pro = model.decision_function(x_test)
    else:
        y_predict_pro = grid.predict_proba(x_test)[:,1]
        y_default_predict_pro = model.predict_proba(x_test)[:,1]

    print('参数调整前后对比:')
    print('best score:',grid.best_score_)
    print('最优参数:',grid.best_params_)
    print('默认参数 AUC:', roc_auc_score(y_test,y_default_predict_pro))
    print('最优参数 AUC:', roc_auc_score(y_test,y_predict_pro))

para = {'penalty':['l1','l2'],'C':[1e-3, 1e-2, 1e-1, 1, 10]}
print('LogisticRegression: ')
gride_search(lr,para)

para = {'C':[1e-3, 1e-2, 1e-1, 1],'kernel':['linear','sigmoid']}
print('svm: ')
gride_search(svm,para)

para = {'criterion':['gini','entropy'],'splitter':['best','random'],
        'max_depth':range(3,10,3),'max_features':['sqrt','log2',None]}
print('DecisionTreeClassifier: ')
gride_search(dt,para)

para = {'max_features':['sqrt','log2',None],'learning_rate':[0.01,0.1,1],
       'n_estimators':range(50,200,50)}
print('GradientBoostingClassifier: ')
gride_search(gbdt,para)


para={'n_estimators':[20,50,100],'criterion':['gini','entropy'],
      'max_depth':range(3,10,3),'max_features':['sqrt','log2',None]}
print('RandomForestClassifier: ')
gride_search(rfc,para)

para = {'learning_rate': [0.2,0.5,0.7], 'max_depth': range(1,10,3), 
        'n_estimators':range(20,100,20)}
print('lgb: ')
gride_search(gbm,para)

para = {'n_estimators':range(50,200,50),'max_depth':[3,6,10],
        'reg_lambda':[0.2,0.5,1]}
print('xgb: ')
gride_search(xgbc,para)

LogisticRegression:
参数调整前后对比:
best score: 0.7938082356477307
最优参数: {‘C’: 0.1, ‘penalty’: ‘l1’}
默认参数 AUC: 0.5674548527432631
最优参数 AUC: 0.7706461978237509
DecisionTreeClassifier:
参数调整前后对比:
best score: 0.7730688307784791
最优参数: {‘criterion’: ‘gini’, ‘max_depth’: 6, ‘max_features’: None, ‘splitter’: ‘random’}
默认参数 AUC: 0.5751828320449022
最优参数 AUC: 0.7053561182226951
GradientBoostingClassifier:
参数调整前后对比:
best score: 0.7965133754132853
最优参数: {‘learning_rate’: 0.1, ‘max_features’: None, ‘n_estimators’: 50}
默认参数 AUC: 0.7623965864396524
最优参数 AUC: 0.7674720666019844
RandomForestClassifier:
参数调整前后对比:
best score: 0.7932070934776074
最优参数: {‘criterion’: ‘gini’, ‘max_depth’: 6, ‘max_features’: None, ‘n_estimators’: 50}
默认参数 AUC: 0.7037573158899566
最优参数 AUC: 0.7594298039706633
lgb:
参数调整前后对比:
best score: 0.7962128043282236
最优参数: {‘learning_rate’: 0.2, ‘max_depth’: 1, ‘n_estimators’: 60}
默认参数 AUC: 0.7574019592501017
最优参数 AUC: 0.7777234411025216
xgb:
参数调整前后对比:
best score: 0.7968139464983469
最优参数: {‘max_depth’: 3, ‘n_estimators’: 50, ‘reg_lambda’: 0.5}
默认参数 AUC: 0.7713634419371329
最优参数 AUC: 0.7705392632468467

SVM没跑出来!等跑出来我再加上!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值