xgboost中C-api使用方法
1.如果数据集在文件中可用,则可以使用XGDMatrixCreateFromFile将其加载到DMatrix对象中
DMatrixHandle data; // handle to DMatrix
// Load the dat from file & store it in data variable of DMatrixHandle datatype
safe_xgboost(XGDMatrixCreateFromFile("/path/to/file/filename", silent, &data));
2.从文件加载dat并将其存储在DMatrixHandle数据类型的数据变量中。
// 1D matrix
const int data1[] = { 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 };
3
// 2D matrix
const int ROWS = 5, COLS = 3;
const int data2[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };
DMatrixHandle dmatrix1, dmatrix2;
// Pass the matrix, no of rows & columns contained in the matrix variable
// here '0' represents the missing value in the matrix dataset
// dmatrix variable will contain the created DMatrix using it
safe_xgboost(XGDMatrixCreateFromMat(data1, 1, 50, 0, &dmatrix));
// here -1 represents the missing value in the matrix dataset
safe_xgboost(XGDMatrixCreateFromMat(data2, ROWS, COLS, -1, &dmatrix2));
3.从文件加载dat并将其存储在DMatrixHandle数据类型的数据变量中
BoosterHandle booster;
const int eval_dmats_size;
// We assume that training and test data have been loaded into 'train' and 'test'
DMatrixHandle eval_dmats[eval_dmats_size] = {train, test};
safe_xgboost(XGBoosterCreate(eval_dmats, eval_dmats_size, &booster));
4.对于每个DMatrix对象,使用XGDMatrixSetFloatInfo设置标签。稍后可以使用XGDMatrixGetFloatInfo访问标签。
const int ROWS=5, COLS=3;
const int data[ROWS][COLS] = { {1, 2, 3}, {2, 4, 6}, {3, -1, 9}, {4, 8, -1}, {2, 5, 1}, {0, 1, 5} };
DMatrixHandle dmatrix;
safe_xgboost(XGDMatrixCreateFromMat(data, ROWS, COLS, -1, &dmatrix));
// variable to store labels for the dataset created from above matrix
float labels[ROWS];
for (int i = 0; i < ROWS; i++) {
labels[i] = i;
}
// Loading the labels
safe_xgboost(XGDMatrixSetFloatInfo(dmatrix, "label", labels, ROWS));
// reading the labels and store the length of the result
bst_ulong result_len;
// labels result
const float *result;
safe_xgboost(XGDMatrixGetFloatInfo(dmatrix, "label", &result_len, &result));
for(unsigned int i = 0; i < result_len; i++) {
printf("label[%i] = %f\n", i, result[i]);
}
5.使用XGBoosterSetParam根据要求设置booster对象的参数(参数可查看官方文档)。
BoosterHandle booster;
safe_xgboost(XGBoosterSetParam(booster, "booster", "gblinear"));
// default max_depth =6
safe_xgboost(XGBoosterSetParam(booster, "max_depth", "3"));
// default eta = 0.3
safe_xgboost(XGBoosterSetParam(booster, "eta", "0.1"));
6.分别使用XGBoosterUpdateOneIter和XGBoosterEvalOneIter对模型进行训练和评估。
int num_of_iterations = 20;
const char* eval_names[eval_dmats_size] = {"train", "test"};
const char* eval_result = NULL;
for (int i = 0; i < num_of_iterations; ++i) {
// Update the model performance for each iteration
safe_xgboost(XGBoosterUpdateOneIter(booster, i, train));
// Give the statistics for the learner for training & testing dataset in terms of error after each iteration
safe_xgboost(XGBoosterEvalOneIter(booster, i, eval_dmats, eval_names, eval_dmats_size, &eval_result));
printf("%s\n", eval_result);
}
ps:对于自定义的损耗函数,可以使用使用XGBoosterBoostOneIter函数,并手动指定梯度和二阶梯度。
7.使用XGBoosterPredict在测试集上预测结果
bst_ulong output_length;
const float *output_result;
safe_xgboost(XGBoosterPredict(booster, test, 0, 0, &output_length, &output_result));
for (unsigned int i = 0; i < output_length; i++){
printf("prediction[%i] = %f \n", i, output_result[i]);
}
8.使用XGDMatrixFree和XGBoosterFree释放代码中使用的所有内部结构。此步骤对于防止内存泄漏非常重要。
safe_xgboost(XGDMatrixFree(dmatrix));
safe_xgboost(XGBoosterFree(booster));
9.使用XGBoosterGetNumFeature获取数据集中的特征数量。
bst_ulong num_of_features = 0;
// Assuming booster variable of type BoosterHandle is already declared
// and dataset is loaded and trained on booster
// storing the results in num_of_features variable
safe_xgboost(XGBoosterGetNumFeature(booster, &num_of_features));
// Printing number of features by type conversion of num_of_features variable from bst_ulong to unsigned long
printf("num_feature: %lu\n", (unsigned long)(num_of_features));
10.加载函数模型XGBoosterLoadModel
BoosterHandle booster;
const char *model_path = "/path/of/model";
// create booster handle first
safe_xgboost(XGBoosterCreate(NULL, 0, &booster));
// set the model parameters here
// load model
safe_xgboost(XGBoosterLoadModel(booster, model_path));
// predict the model here