xgboost的原理:
参考链接:xgboost原理介绍
1.准备数据集
这利用的是经典的乳腺癌数据集,568条数据,30种特征。
乳腺癌数据集:breast_cancer.csv
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from xgboost import plot_importance
from xgboost import plot_tree
from sklearn import metrics
#1.训练集和测试集准备
df = pd.read_csv('../data/data/breast_cancer.csv')
#label
print(df.loc[