hpsklearn调参实例

最新推荐文章于 2024-05-13 09:55:26 发布

原创最新推荐文章于 2024-05-13 09:55:26 发布 · 1.3k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#hyperopt #sklearn #调参

数据挖掘同时被 2 个专栏收录

20 篇文章

订阅专栏

机器学习

16 篇文章

订阅专栏

本文介绍使用Hyperopt-sklearn进行高效模型选择的方法。通过在鸢尾花数据集上进行分类任务及波士顿房价数据集上的回归任务，演示如何自动优化机器学习管道中的预处理步骤、模型选择及其参数。

from __future__ import print_function
# import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from hyperopt import tpe
import hpsklearn
import sys

def test_demo_iris():

    iris = datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=.25, random_state=1)

    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        classifier=hpsklearn.components.any_classifier('clf'),
        algo=tpe.suggest,
        trial_timeout=15.0,  # seconds
        max_evals=10,
        seed=1
    )

    # /BEGIN `Demo version of estimator.fit()`
    print('', file=sys.stderr)
    print('====Demo classification on Iris dataset====', file=sys.stderr)

    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)

    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], 
              file=sys.stderr)
        # hpsklearn.demo_support.scatter_error_vs_time(estimator)
        # hpsklearn.demo_support.bar_classifier_choice(estimator)

    estimator.retrain_best_model_on_full_data(X_train, y_train)

    # /END Demo version of `estimator.fit()`

    print('Test accuracy:', estimator.score(X_test, y_test), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)


def test_demo_boston():

    boston = datasets.load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, test_size=.25, random_state=1)

    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        regressor=hpsklearn.components.any_regressor('reg'),
        algo=tpe.suggest,
        trial_timeout=15.0,  # seconds
        max_evals=10,
        seed=1
    )

    # /BEGIN `Demo version of estimator.fit()`
    print('', file=sys.stderr)
    print('====Demo regression on Boston dataset====', file=sys.stderr)


    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)

    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], 
              file=sys.stderr)
        # hpsklearn.demo_support.scatter_error_vs_time(estimator)
        # hpsklearn.demo_support.bar_classifier_choice(estimator)

    estimator.retrain_best_model_on_full_data(X_train, y_train)

    # /END Demo version of `estimator.fit()`

    print('Test R2:', estimator.score(X_test, y_test), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)