python-autosklearn-多进程

本文详细介绍了如何利用Python库autosklearn进行自动化机器学习,并着重探讨了如何配置和实现多进程并行化,以提高模型训练的效率。通过实例展示,读者将了解到如何有效地分配计算资源,提升实验效果。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import multiprocessing
import shutil
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
from autosklearn.metrics import accuracy
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.constants import MULTICLASS_CLASSIFICATION

tmp_folder = '/tmp/autosklearn_parallel_2_example_tmp'
output_folder = '/tmp/autosklearn_parallel_2_example_out'


for dir_ in [tmp_folder, output_folder]:
    try:
    	# 递归的删除文件夹下的所有子文件夹和子文件
        shutil.rmtree(dir_)
    except OSError:
        pass


def get_spawn_classifier(X_train, y_train):
    def spawn_classifier(seed, dataset_name):
        """Spawn a subprocess.

        auto-sklearn does not take care of spawning worker processes. This
        function, which is called several times in the main block is a new
        process which runs one instance of auto-sklearn.
        """

        # Use the initial configurations from meta-learning only in one out of
        # the four processes
from autosklearn.regression import AutoSklearnRegressor from autosklearn.classification import AutoSklearnClassifier from sklearn.cluster import KMeans, SpectralClustering, AgglomerativeClustering from sklearn.feature_selection import RFECV from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score import numpy as np import pandas as pd import time from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, accuracy_score, precision_score, recall_score, r2_score, mean_absolute_error, mean_squared_error from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor from xgboost import XGBClassifier, XGBRegressor from sklearn.svm import SVC from lightgbm import LGBMClassifier, LGBMRegressor from bayes_opt import BayesianOptimization from sklearn.utils import resample df_fire = pd.read_csv("gds_fire_30mm.csv") df_nofire = pd.read_csv("gds_nofire_30mm.csv") df = pd.concat([df_fire,df_nofire]) features = ['system:index', 'days', 'evisum', 'ndwisum', 'ndvisum', 'lstsum', '.geo', 'time'] df = df.drop(features, axis = 1) df = df[df.apply(lambda row: row.isin([-99999.000000]).sum() == 0, axis=1)] # 加载数据集并进行标准化处理 X = df.drop(['labels'], axis = 1).values y = df['labels'].values scaler = StandardScaler() X_scaled = scaler.fit_transform(X) X_resampled, y_resampled = resample(X_scaled, y, replace=True, n_samples=len(y==1), random_state=42) X_train_resampled, X_test_resampled, y_train_resampled, y_test_resampled = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42) automl = AutoSklearnClassifier( time_left_for_this_task=120*5, per_run_time_limit=30, metric=autosklearn.metrics.accuracy, seed=42, resampling_strategy='cv', resampling_strategy_arguments={'folds': 5} ) automl.fit(X_train_resampled, y_train_resampled) automl.leaderboard(detailed = True, ensemble_only=False) print(automl.sprint_statistics()) 报错:[ERROR] [2025-08-01 03:56:04,284:Client-AutoML(42):7119140d-6e8b-11f0-9494-024264400002] (' Dummy prediction failed with run state StatusType.CRASHED and additional output: {\'error\': \'Result queue is empty\', \'exit_status\': "<class \'pynisher.limit_function_call.AnythingException\'>", \'subprocess_stdout\': \'\', \'subprocess_stderr\': \'Process pynisher function call:\\nTraceback (most recent call last):\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap\\n self.run()\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 108, in run\\n self._target(*self._args, **self._kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/pynisher/limit_function_call.py", line 133, in subprocess_func\\n return_value = ((func(*args, **kwargs), 0))\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/__init__.py", line 55, in fit_predict_try_except_decorator\\n return ta(queue=queue, **kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 1386, in eval_cv\\n evaluator = TrainEvaluator(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 206, in __init__\\n super().__init__(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/abstract_evaluator.py", line 215, in __init__\\n threadpool_limits(limits=1)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 794, in __init__\\n super().__init__(ThreadpoolController(), limits=limits, user_api=user_api)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 587, in __init__\\n self._set_threadpool_limits()\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 720, in _set_threadpool_limits\\n lib_controller.set_num_threads(num_threads)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 199, in set_num_threads\\n return set_num_threads_func(num_threads)\\nKeyboardInterrupt\\n\', \'exitcode\': 1, \'configuration_origin\': \'DUMMY\'}.',) [ERROR] [2025-08-01 03:56:04,284:Client-AutoML(42):7119140d-6e8b-11f0-9494-024264400002] (' Dummy prediction failed with run state StatusType.CRASHED and additional output: {\'error\': \'Result queue is empty\', \'exit_status\': "<class \'pynisher.limit_function_call.AnythingException\'>", \'subprocess_stdout\': \'\', \'subprocess_stderr\': \'Process pynisher function call:\\nTraceback (most recent call last):\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap\\n self.run()\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 108, in run\\n self._target(*self._args, **self._kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/pynisher/limit_function_call.py", line 133, in subprocess_func\\n return_value = ((func(*args, **kwargs), 0))\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/__init__.py", line 55, in fit_predict_try_except_decorator\\n return ta(queue=queue, **kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 1386, in eval_cv\\n evaluator = TrainEvaluator(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 206, in __init__\\n super().__init__(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/abstract_evaluator.py", line 215, in __init__\\n threadpool_limits(limits=1)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 794, in __init__\\n super().__init__(ThreadpoolController(), limits=limits, user_api=user_api)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 587, in __init__\\n self._set_threadpool_limits()\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 720, in _set_threadpool_limits\\n lib_controller.set_num_threads(num_threads)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 199, in set_num_threads\\n return set_num_threads_func(num_threads)\\nKeyboardInterrupt\\n\', \'exitcode\': 1, \'configuration_origin\': \'DUMMY\'}.',) Traceback (most recent call last): File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py", line 765, in fit self._do_dummy_prediction() File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py", line 489, in _do_dummy_prediction raise ValueError(msg) ValueError: (' Dummy prediction failed with run state StatusType.CRASHED and additional output: {\'error\': \'Result queue is empty\', \'exit_status\': "<class \'pynisher.limit_function_call.AnythingException\'>", \'subprocess_stdout\': \'\', \'subprocess_stderr\': \'Process pynisher function call:\\nTraceback (most recent call last):\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap\\n self.run()\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 108, in run\\n self._target(*self._args, **self._kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/pynisher/limit_function_call.py", line 133, in subprocess_func\\n return_value = ((func(*args, **kwargs), 0))\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/__init__.py", line 55, in fit_predict_try_except_decorator\\n return ta(queue=queue, **kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 1386, in eval_cv\\n evaluator = TrainEvaluator(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 206, in __init__\\n super().__init__(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/abstract_evaluator.py", line 215, in __init__\\n threadpool_limits(limits=1)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 794, in __init__\\n super().__init__(ThreadpoolController(), limits=limits, user_api=user_api)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 587, in __init__\\n self._set_threadpool_limits()\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 720, in _set_threadpool_limits\\n lib_controller.set_num_threads(num_threads)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 199, in set_num_threads\\n return set_num_threads_func(num_threads)\\nKeyboardInterrupt\\n\', \'exitcode\': 1, \'configuration_origin\': \'DUMMY\'}.',) --------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[15], line 9 1 automl = AutoSklearnClassifier( 2 time_left_for_this_task=120*5, 3 per_run_time_limit=30, (...) 7 resampling_strategy_arguments={'folds': 5} 8 ) ----> 9 automl.fit(X_train_resampled, y_train_resampled) 10 automl.leaderboard(detailed = True, ensemble_only=False) 11 print(automl.sprint_statistics()) File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/estimators.py:1448, in AutoSklearnClassifier.fit(self, X, y, X_test, y_test, feat_type, dataset_name) 1445 # remember target type for using in predict_proba later. 1446 self.target_type = target_type -> 1448 super().fit( 1449 X=X, 1450 y=y, 1451 X_test=X_test, 1452 y_test=y_test, 1453 feat_type=feat_type, 1454 dataset_name=dataset_name, 1455 ) 1457 # After fit, a classifier is expected to define classes_ 1458 # A list of class labels known to the classifier, mapping each label 1459 # to a numerical index used in the model representation our output. 1460 self.classes_ = self.automl_.InputValidator.target_validator.classes_ File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/estimators.py:540, in AutoSklearnEstimator.fit(self, **kwargs) 538 if self.automl_ is None: 539 self.automl_ = self.build_automl() --> 540 self.automl_.fit(load_models=self.load_models, **kwargs) 542 return self File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py:2304, in AutoMLClassifier.fit(self, X, y, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models) 2293 def fit( 2294 self, 2295 X: SUPPORTED_FEAT_TYPES, (...) 2302 load_models: bool = True, 2303 ) -> AutoMLClassifier: -> 2304 return super().fit( 2305 X, 2306 y, 2307 X_test=X_test, 2308 y_test=y_test, 2309 feat_type=feat_type, 2310 dataset_name=dataset_name, 2311 only_return_configuration_space=only_return_configuration_space, 2312 load_models=load_models, 2313 is_classification=True, 2314 ) File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py:962, in AutoML.fit(self, X, y, task, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models, is_classification) 959 except Exception as e: 960 # This will be called before the _fit_cleanup 961 self._logger.exception(e) --> 962 raise e 963 finally: 964 self._fit_cleanup() File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py:765, in AutoML.fit(self, X, y, task, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models, is_classification) 763 with self._stopwatch.time("Dummy predictions"): 764 self.num_run += 1 --> 765 self._do_dummy_prediction() 767 # == RUN ensemble builder 768 # Do this before calculating the meta-features to make sure that the 769 # dummy predictions are actually included in the ensemble even if 770 # calculating the meta-features takes very long 771 with self._stopwatch.time("Run Ensemble Builder"): File /opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/automl.py:489, in AutoML._do_dummy_prediction(self) 483 msg = ( 484 f" Dummy prediction failed with run state {status} and" 485 f" additional output: {additional_info}.", 486 ) 488 self._logger.error(msg) --> 489 raise ValueError(msg) 491 return ValueError: (' Dummy prediction failed with run state StatusType.CRASHED and additional output: {\'error\': \'Result queue is empty\', \'exit_status\': "<class \'pynisher.limit_function_call.AnythingException\'>", \'subprocess_stdout\': \'\', \'subprocess_stderr\': \'Process pynisher function call:\\nTraceback (most recent call last):\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap\\n self.run()\\n File "/opt/conda/envs/mmedu/lib/python3.8/multiprocessing/process.py", line 108, in run\\n self._target(*self._args, **self._kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/pynisher/limit_function_call.py", line 133, in subprocess_func\\n return_value = ((func(*args, **kwargs), 0))\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/__init__.py", line 55, in fit_predict_try_except_decorator\\n return ta(queue=queue, **kwargs)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 1386, in eval_cv\\n evaluator = TrainEvaluator(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/train_evaluator.py", line 206, in __init__\\n super().__init__(\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/autosklearn/evaluation/abstract_evaluator.py", line 215, in __init__\\n threadpool_limits(limits=1)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 794, in __init__\\n super().__init__(ThreadpoolController(), limits=limits, user_api=user_api)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 587, in __init__\\n self._set_threadpool_limits()\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 720, in _set_threadpool_limits\\n lib_controller.set_num_threads(num_threads)\\n File "/opt/conda/envs/mmedu/lib/python3.8/site-packages/threadpoolctl.py", line 199, in set_num_threads\\n return set_num_threads_func(num_threads)\\nKeyboardInterrupt\\n\', \'exitcode\': 1, \'configuration_origin\': \'DUMMY\'}.',)
最新发布
08-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值