一周算法实践day2:集成模型构建

1任务

  • 构建随机森林、GBDT、XGBoost和LightGBM这4个模型,评分方式任意。

2 遇到的问题

xgboost 安装gpu版本时遇到的问题

3 完整代码和注释

from __future__ import print_function
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import pandas as pd


# 读取数据
data_all = pd.read_csv('../data_all.csv')
print("数据行列数:",data_all.shape)
# print("每列数据的特征名称", data_all.keys())


# 划分数据集
df_y = data_all['status']
df_X = data_all.drop(columns=['status'])
df_X = scale(df_X.astype("float"), axis=0)
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3,random_state=2018)
print("训练集数量:", X_train.shape[0], "测试集数量:", X_test.shape[0])
print("各类样本数量:\n",y_train.value_counts())

# 依次训练四种模型
rfc = RandomForestClassifier(n_estimators=100, random_state=2018)
rfc.fit(X_train, y_train)
rfc_score = rfc.score(X_test, y_test)

gbc = GradientBoostingClassifier(random_state=2018)
gbc.fit(X_train, y_train)
gbc_score = gbc.score(X_test, y_test)

xgbc = XGBClassifier(random_state=2018)
xgbc.fit(X_train, y_train)
xgbc_score = xgbc.score(X_test, y_test)

lgbc = LGBMClassifier(random_state=2018)
lgbc.fit(X_train, y_train)
lgbc_score = lgbc.score(X_test, y_test)

print("RandomForestClassifier Acc: %f, GradientBoostingClassifier Acc: %f" %(rfc_score, gbc_score))
print("XGBClassifier Acc: %f, LGBMClassifier Acc: %f" %(xgbc_score, lgbc_score))
#RandomForestClassifier Acc: 0.782761, GradientBoostingClassifier Acc: 0.780659
#XGBClassifier Acc: 0.784163, LGBMClassifier Acc: 0.772249
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值