XGBoost调参(Parameter Tuning in XGBoost)

最新推荐文章于 2025-06-01 09:04:39 发布

原创最新推荐文章于 2025-06-01 09:04:39 发布 · 1.1k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#xgbt #调参 #XGBoost #parameter tuning

本文通过使用XGBoost分类器对Hastie数据集进行预测，详细介绍了如何通过GridSearchCV进行参数调优，包括调整max_depth、min_child_weight、gamma、subsample、colsample_bytree、reg_alpha和n_estimators等关键参数，以达到提高模型AUC和准确率的目的。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

from sklearn.model_selection import train_test_split
from sklearn import metrics
from  sklearn.datasets  import  make_hastie_10_2
from  sklearn.ensemble  import  GradientBoostingClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV

##载入示例数据 10维度
X, y = make_hastie_10_2(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)##test_size测试集合所占比例

默认xgbt参数

auc_Score=[]
accuracy=[]
clf = XGBClassifier()
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print( "AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre))
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

第一步调节max_depth，min_child_weight

param_test1 = {
 'max_depth':range(3,10),
 'min_child_weight':range(1,12)
}
gsearch1 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27), 
 param_grid = param_test1, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch1.fit(X_train,y_train)
#gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
gsearch1.best_params_, gsearch1.best_score_

在这里插入图片描述

第二步调节gamma

param_test2 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch2 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch2.fit(X_train,y_train)
auc_Score.append(gsearch2.best_score_)
gsearch2.best_params_, gsearch2.best_score_

在这里插入图片描述

第三步调节subsample，colsample_bytree

param_test3 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch3 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.8,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test3, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch3.fit(X_train,y_train)
auc_Score.append(gsearch3.best_score_)
gsearch3.best_params_, gsearch3.best_score_

在这里插入图片描述

第四步调节reg_alpha

param_test4 = {
 'reg_alpha':[1e-5, 1e-2, 0.001, 0.005, 0.01, 0.05, 1, 100]
}
gsearch4 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test4 ,scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch4.fit(X_train,y_train)
auc_Score.append(gsearch4.best_score_)
gsearch4.best_params_, gsearch4.best_score_

在这里插入图片描述

第五步调节n_estimators

param_test5 = {
 'n_estimators':[100,140,200,500,1000,1500]
}
gsearch5 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=140, max_depth=9,
                                                  reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test5, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch5.fit(X_train,y_train)
auc_Score.append(gsearch5.best_score_)
gsearch5.best_params_, gsearch5.best_score_

在这里插入图片描述

第六步调节n_estimators

param_test6 = {
'learning_rate':[0.01,0.02,0.05,0.1,0.3]
}
gsearch6 = GridSearchCV(estimator = XGBClassifier( learning_rate =0.1, n_estimators=200, max_depth=9,
                                                   reg_alpha = 1e-05,
 min_child_weight=5, gamma=0.3, subsample=0.8, colsample_bytree=0.7,
 objective= 'binary:logistic', nthread=4, scale_pos_weight=1,seed=27), 
 param_grid = param_test6, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gsearch6.fit(X_train,y_train)
auc_Score.append(gsearch6.best_score_)
gsearch6.best_params_, gsearch6.best_score_

在这里插入图片描述

最后

#最优
clf = XGBClassifier(
 learning_rate =0.1, #默认0.3
 n_estimators=200, #树的个数
 max_depth=9,
 min_child_weight=5,
 gamma=0.3,
 subsample=0.8,
 colsample_bytree=0.7,
 objective= 'binary:logistic', #逻辑回归损失函数
 nthread=4,  #cpu线程数
 reg_alpha = 1e-05,
 scale_pos_weight=1,
 seed=27)  #随机种子
clf.fit(X_train, y_train)
y_pre= clf.predict(X_test)
y_pro= clf.predict_proba(X_test)[:,1] 
print ("AUC Score : %f" % metrics.roc_auc_score(y_test, y_pro) )
print("Accuracy : %.4g" % metrics.accuracy_score(y_test, y_pre) )
auc_Score.append(metrics.roc_auc_score(y_test, y_pro))
accuracy.append(metrics.accuracy_score(y_test, y_pre))

一张参数重要性的图
在这里插入图片描述