XGBoost调参(Parameter Tuning in XGBoost)

本文通过使用XGBoost分类器对Hastie数据集进行预测,详细介绍了如何通过GridSearchCV进行参数调优,包括调整max_depth、min_child_weight、gamma、subsample、colsample_bytree、reg_alpha和n_estimators等关键参数,以达到提高模型AUC和准确率的目的。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from sklearn.model_selection import train_test_split
from sklearn import metrics
from  sklearn.datasets  import  make_hastie_10_2
from  sklearn.ensemble  import  GradientBoostingClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV

##载入示例数据 10维度
X, y = make_hastie_10_2(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)##test_size测试集合所占比例

默认xgbt参数

auc_Score=[]
accuracy=[]
clf = XGBClassifier()
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print( "AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre))
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

第一步 调节max_depth,min_child_weight

param_test1 = {
 'max_depth':range(3,10),
 'min_child_weight':range(1,12)
}
gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), 
 param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch1.fit(X_train,y_train)
#gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
gsearch1.best_params_, gsearch1.best_score_

在这里插入图片描述

第二步 调节gamma

param_test2 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch2 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch2.fit(X_train,y_train)
auc_Score.append(gsearch2.best_score_)
gsearch2.best_params_, gsearch2.best_score_

在这里插入图片描述

第三步 调节subsample,colsample_bytree

param_test3 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch3 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch3.fit(X_train,y_train)
auc_Score.append(gsearch3.best_score_)
gsearch3.best_params_, gsearch3.best_score_

在这里插入图片描述

第四步 调节reg_alpha

param_test4 = {
 'reg_alpha':[1e-5, 1e-2, 0.001, 0.005, 0.01, 0.05, 1, 100]
}
gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test4 ,scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch4.fit(X_train,y_train)
auc_Score.append(gsearch4.best_score_)
gsearch4.best_params_, gsearch4.best_score_

在这里插入图片描述

第五步 调节n_estimators

param_test5 = {
 'n_estimators':[100,140,200,500,1000,1500]
}
gsearch5 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
                                                  reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test5, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch5.fit(X_train,y_train)
auc_Score.append(gsearch5.best_score_)
gsearch5.best_params_, gsearch5.best_score_

在这里插入图片描述

第六步 调节n_estimators

param_test6 = {
'learning_rate':[0.01,0.02,0.05,0.1,0.3]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=200, max_depth=9,
                                                   reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(X_train,y_train)
auc_Score.append(gsearch6.best_score_)
gsearch6.best_params_, gsearch6.best_score_

在这里插入图片描述

最后

#最优
clf = XGBClassifier(
 learning_rate =0.1, #默认0.3
 n_estimators=200, #树的个数
 max_depth=9,
 min_child_weight=5,
 gamma=0.3,
 subsample=0.8,
 colsample_bytree=0.7,
 objective= 'binary:logistic', #逻辑回归损失函数
 nthread=4,  #cpu线程数
 reg_alpha = 1e-05,
 scale_pos_weight=1,
 seed=27)  #随机种子
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print ("AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre) )
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

一张参数重要性的图
在这里插入图片描述

参数的介绍:https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
代码:https://github.com/sleepingxin/code/blob/master/xgbt调参.ipynb
参考:https://github.com/lytforgood/MachineLearningTrick/blob/master/xgboost调参演示.md

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值