1任务
- 构建随机森林、GBDT、XGBoost和LightGBM这4个模型,评分方式任意。
2 遇到的问题
xgboost 安装gpu版本时遇到的问题
3 完整代码和注释
from __future__ import print_function
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import pandas as pd
data_all = pd.read_csv('../data_all.csv')
print("数据行列数:",data_all.shape)
df_y = data_all['status']
df_X = data_all.drop(columns=['status'])
df_X = scale(df_X.astype("float"), axis=0)
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3,random_state=2018)
print("训练集数量:", X_train.shape[0], "测试集数量:", X_test.shape[0])
print("各类样本数量:\n",y_train.value_counts())
rfc = RandomForestClassifier(n_estimators=100, random_state=2018)
rfc.fit(X_train, y_train)
rfc_score = rfc.score(X_test, y_test)
gbc = GradientBoostingClassifier(random_state=2018)
gbc.fit(X_train, y_train)
gbc_score = gbc.score(X_test, y_test)
xgbc = XGBClassifier(random_state=2018)
xgbc.fit(X_train, y_train)
xgbc_score = xgbc.score(X_test, y_test)
lgbc = LGBMClassifier(random_state=2018)
lgbc.fit(X_train, y_train)
lgbc_score = lgbc.score(X_test, y_test)
print("RandomForestClassifier Acc: %f, GradientBoostingClassifier Acc: %f" %(rfc_score, gbc_score))
print("XGBClassifier Acc: %f, LGBMClassifier Acc: %f" %(xgbc_score, lgbc_score))