模型调参
核心代码
def auto_search(model,grid_param):
grid = GridSearchCV(model,grid_param,scoring='accuracy',cv=5)
grid = grid.fit(x_train,y_train)
y_predict_pro = grid.predict_proba(x_test)[:,1]
print('best score:',grid.best_score_)
print(grid.best_params_)
print('test score:',grid.score(x_test,y_test))
print('AUC:',metrics.roc_auc_score(y_test,y_predict_pro))
分模型自动调参
print('逻辑回归:')
# C浮点型,默认:1.0;其值等于正则化强度的倒数,为正的浮点数。数值越小表示正则化越强。
# penalty 字符串型,’l1’ or ‘l2’,默认:’l2’;正则化类型。
grid_param = {'C':[0.1,1,2,3],'penalty':['l1','l2']}
lr = LogisticRegression(random_state=2018)
lr.fit(x_train,y_train)
auto_search(lr, grid_param)
print('')
#SVM
print('SVM:')
# grid_param = {'C':[0.1,1,2,3],'kernel':['linear','poly','rbf']}
# svc = svm.SVC(random_state=2018)
# svc.fit(x_train,y_train)
# auto_search(svc,grid_param)
# print('')
#决策树
print('决策树:')
grid_param = {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,3,4,5,6], 'splitter': ['best', 'random'],
'max_features': ['log2', 'sqrt', 'auto']}
dt = DecisionTreeClassifier(random_state=2018)
dt.fit(x_train,y_train)
auto_search(dt,grid_param)
print('')
#随机森林
print('随机森林:')
grid_param = {'n_estimators': range(1,200), 'max_features': ['log2', 'sqrt', 'auto']}
rf_clf = RandomForestClassifier(random_state=2018)
rf_clf.fit(x_train,y_train)
auto_search(rf_clf,grid_param)
print('')
#GBDT
print('GBDT:')
grid_param = {'n_estimators': range(1,100,10),'learning_rate': np.arange(0.1, 1, 0.1)}
gbdt = GradientBoostingClassifier(random_state=2018)
gbdt.fit(x_train,y_train)
auto_search(gbdt,grid_param)
print('')
#XGBoost
print('XGBoost:')
grid_param = {'eta': np.arange(0.1, 0.5, 0.1), 'max_depth': range(1,6,1), 'min_child_weight': range(1,6,1)}
xgb_clf = xgb.XGBClassifier(random_state=2018)
xgb_clf.fit(x_train,y_train)
auto_search(xgb_clf,grid_param)
print('')
#LightGBM
parameters = {'learning_rate': np.arange(0.1,0.5,0.1), 'max_depth': range(1,6,1), 'n_estimators':range(30,50,5)}
lgb_clf = lgb.LGBMClassifier(random_state=2018)
lgb_clf.fit(x_train,y_train)
auto_search(lgb_clf,grid_param)
print('')
结果如下:
逻辑回归:
best score: 0.7929065223925459
{'C': 0.1, 'penalty': 'l1'}
test score: 0.7841625788367204
AUC: 0.7708835404212702
SVM:
决策树:
best score: 0.7646528403967539
{'max_depth': 3, 'splitter': 'best', 'criterion': 'gini', 'max_features': 'log2'}
test score: 0.7491240364400841
AUC: 0.701477783689608