import numpy as np
import xgboost as xgb
### load data in do training
dtrain = xgb.DMatrix(basePath+'data/agaricus.txt.train')
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
num_round = 2
print('running cross validation')
running cross validation
# do cross validation, this will print result out as# [iteration] metric_name:mean_value+std_value# std_value is standard deviation of the metric#metrics:验证数据的评估指标,默认指标(rmse用于回归,error误差用于分类
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0,
callbacks=[xgb.callback.print_evaluation(show_stdv=True)])
print('running cross validation, disable standard deviation display')
running cross validation, disable standard deviation display
# do cross validation, this will print result out as# [iteration] metric_name:mean_value # num_boost_round=10:增强数量的迭代
res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5,
metrics={'error'}, seed=0,
callbacks=[xgb.callback.print_evaluation(show_stdv=False),
xgb.callback.early_stop(3)]) #提前停止的条件:Will train until test-error hasn't improved in 3 rounds.
[0] train-error:0.0506682 test-error:0.0557316
Multiple eval metrics have been passed: ‘test-error’ will be used for early stopping.
Will train until test-error hasn’t improved in 3 rounds.
[1] train-error:0.0213034 test-error:0.0211884
[2] train-error:0.0099418 test-error:0.0099786
[3] train-error:0.0141256 test-error:0.0144336
[4] train-error:0.0059878 test-error:0.0062948
[5] train-error:0.0020344 test-error:0.0016886
[6] train-error:0.0012284 test-error:0.001228
[7] train-error:0.0012284 test-error:0.001228
[8] train-error:0.0009212 test-error:0.001228
[9] train-error:0.0006142 test-error:0.001228
Stopping. Best iteration:
[6] train-error:0.0012284+0.000260265 test-error:0.001228+0.00104094
print('running cross validation, with preprocessing function')
running cross validation, with preprocessing function
# define the preprocessing function# used to return the preprocessed training, test data, and parameter# we can use this to do weight rescale, etc.# as a example, we try to set scale_pos_weight#预处理函数,接受(dtrain, dtest, param)并返回转换后的版本。deffpreproc(dtrain, dtest, param):
label = dtrain.get_label()
ratio = float(np.sum(label == 0)) / np.sum(label == 1)
param['scale_pos_weight'] = ratio #控制正权重和负权重的平衡,这对不平衡类很有用。要考虑的一个典型值:sum(负实例)/ sum(正实例)return (dtrain, dtest, param)
# do cross validation, for each fold# the dtrain, dtest, param will be passed into fpreproc# then the return value of fpreproc will be used to generate# results of that fold
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'auc'}, seed=0, fpreproc=fpreproc) #auc:曲线下的面积
###使用自定义损失函数# you can also do cross validation with customized loss function# See custom_objective.py##
print('running cross validation, with cutomsized loss function')
running cross validation, with cutomsized loss function