lightgbm 各任务基本代码

lightgbm适用于多个任务(回归,二分类,多分类),具体的参数需要做出变化,下面给出各任务的基本代码。

回归
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,mean_squared_error
import numpy as np
import lightgbm as lgb

boston_price = datasets.load_boston()

data = boston_price.data
target = boston_price.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))

# 转换为Dataset数据格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# 参数
params = {
    'boosting_type': 'gbdt',  # 设置提升类型
    'objective': 'regression',  # 目标函数
    'metric': {'mse'},  # 评估函数
    'num_leaves': 31,  # 叶子节点数
    'learning_rate': 0.05,  # 学习速率
    'verbose': 1  # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}

# 模型训练
gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=lgb_eval,early_stopping_rounds=5)

# 模型预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)  # shape [B]

print('mse=%.6f'%(mean_squared_error(y_test, y_pred, )))
二分类
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,mean_squared_error
import numpy as np
from matplotlib import pyplot as plt
import lightgbm as lgb
import pickle


boston_price = datasets.load_breast_cancer()

data = boston_price.data
target = boston_price.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=1024)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))

# 转换为Dataset数据格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# 参数
params = {
    'boosting_type': 'gbdt',  # 设置提升类型
    'objective': 'binary',  # 目标函数
    'metric': {'auc'},  # 评估函数
    'num_leaves': 31,  # 叶子节点数
    'learning_rate': 0.05,  # 学习速率
    'nthread': 120,
    'verbose': 1  # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}


# 模型训练
evals_result = {}  # to record eval results
gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=lgb_eval,early_stopping_rounds=5,evals_result=evals_result)

# 模型预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)  # shape [B]

print('auc=%.6f'%(roc_auc_score(y_test, y_pred, )))

# 特征重要性图
lgb.plot_importance(gbm)
plt.show()

# 指标图
lgb.plot_metric(evals_result)
plt.show()


# 使用pickle保存模型
with open('model.pkl','wb') as f:
    pickle.dump(gbm,f)

# 使用pickle加载模型
with open('model.pkl', 'rb') as f:
    gbm = pickle.load(f)
多分类
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import numpy as np
import lightgbm as lgb
import pickle
import random
import matplotlib.pyplot as plt


np.random.seed(1024)
random.seed(1024)

minst_digits = datasets.load_digits()

data = minst_digits.data
target = minst_digits.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=1024)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test))

# 转换为Dataset数据格式
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# 参数
params = {
    'boosting_type': 'gbdt',  # 设置提升类型
    'objective': 'multiclass',  # 目标函数
    'num_class':10,             # 类别数目
    'metric': {'multi_logloss'},  # 评估函数
    'num_leaves': 31,  # 叶子节点数
    'learning_rate': 0.05,  # 学习速率
    'feature_fraction':0.8,   # 如果 feature_fraction 小于 1.0, LightGBM 将会在每次迭代中随机选择部分特征.  alias=colsample_bytree
    'feature_fraction_seed': 1,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'bagging_seed': 3,
    'nthread': 4,
    'verbose': 1  # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}

# 模型训练
evals_result = {}  # to record eval results
gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, evals_result=evals_result)

# 模型预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)  # shape [B, n_class]

# y_pred = np.argmax(y_pred,axis=-1)  # shape [B,]
print('best_iteration={} '.format(gbm.best_iteration))
print('auc=%.6f'%(roc_auc_score(y_test, y_pred, multi_class='ovr')))

print(gbm.best_score)
lgb.plot_importance(gbm)
plt.show()

lgb.plot_metric(evals_result)
plt.show()

更加详细的教程可以见https://github.com/microsoft/LightGBM/tree/master/examples/python-guide

以上是lgb原生使用形式,兼容sklearn的使用方法可以见 LightGBM两种使用方式

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值