导入各种包
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,f1_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
导入数据
data=pd.read_csv('./data.csv',index_col=0,encoding='gbk')
数据理解
#单独提取出y列标签,和其余的88列标记为x
y=data['status']
X=data.drop('status',axis=1)
#X值的行列数,以及y的分布类型
print('X.shape:',X.shape)
print('y的分布:',y.value_counts())
<