使用Boost.Lambda的is_instance_of_1的测试程序

372 篇文章 ¥29.90 ¥99.00
本文介绍了如何利用Boost.Lambda库中的is_instance_of_1功能来测试C++对象的类型。通过示例代码展示了如何创建测试程序,检查对象是否为特定类型实例,并解释了其在泛型编程和运行时类型检查中的应用。

使用Boost.Lambda的is_instance_of_1的测试程序

Boost.Lambda是一个C++库,它提供了一种更简洁和直观的方式来编写函数对象。其中一个有用的功能是is_instance_of_1,它可以用于检查一个对象是否是给定类型的实例。本文将为你展示如何使用Boost.Lambda的is_instance_of_1来测试一个对象的类型,并提供相应的源代码。

首先,确保你已经安装了Boost库,并将其包含在你的项目中。接下来,我们将编写一个测试程序,该程序将使用is_instance_of_1来检查一个对象是否是特定类型的实例。

#include <iostream>
#include <boost/lambda/lambda.hpp>
# 时间:2024年6月8号 Date: June 16, 2024 # 文件名称 Filename: Ensemble_Models.py # 集成模型:使用模型融合和Stacking策略提升预测精度 # coding=utf-8 import time import pandas as pd import numpy as np from sklearn.metrics import mean_absolute_error from sklearn.preprocessing import RobustScaler from sklearn.model_selection import KFold from xgboost import XGBRegressor from lightgbm import LGBMRegressor try: from catboost import CatBoostRegressor CATBOOST_AVAILABLE = True except ImportError: CATBOOST_AVAILABLE = False print("警告: CatBoost未安装,将跳过CatBoost模型") from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.multioutput import MultiOutputRegressor from sklearn.linear_model import Ridge from sklearn.neural_network import MLPRegressor start_time = time.time() # 加载数据集 train_dataSet = pd.read_csv(r'modified_数据集Time_Series661_detail.dat') test_dataSet = pd.read_csv(r'modified_数据集Time_Series662_detail.dat') # columns表示原始列,noise_columns表示添加噪声的列 columns = ['T_SONIC', 'CO2_density', 'CO2_density_fast_tmpr', 'H2O_density', 'H2O_sig_strgth', 'CO2_sig_strgth'] noise_columns = ['Error_T_SONIC', 'Error_CO2_density', 'Error_CO2_density_fast_tmpr', 'Error_H2O_density', 'Error_H2O_sig_strgth', 'Error_CO2_sig_strgth'] # 划分训练集和测试集 X_train = train_dataSet[noise_columns].copy() y_train = train_dataSet[columns].copy() X_test = test_dataSet[noise_columns].copy() y_test = test_dataSet[columns].copy() print("="*60) print("集成模型训练") print("="*60) print(f"训练集大小: {X_train.shape}") print(f"测试集大小: {X_test.shape}") # 数据预处理 scaler = RobustScaler() X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns) X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns) # 定义基模型 # 注意:XGBoost 和 RandomForest 原生支持多输出,其他模型需要使用 MultiOutputRegressor 包装 base_models = { 'XGBoost': XGBRegressor( n_estimators=300, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, reg_alpha=5, reg_lambda=10, min_child_weight=3, random_state=217, n_jobs=-1, tree_method='hist' ), 'LightGBM': MultiOutputRegressor(LGBMRegressor( n_estimators=300, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, reg_alpha=5, reg_lambda=10, min_child_samples=20, random_state=217, n_jobs=1, # 每个输出使用1个job,MultiOutputRegressor会并行处理 verbose=-1 ), n_jobs=-1), # MultiOutputRegressor使用所有CPU核心 'RandomForest': RandomForestRegressor( n_estimators=200, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=217, n_jobs=-1 ), 'GradientBoosting': MultiOutputRegressor(GradientBoostingRegressor( n_estimators=200, max_depth=6, learning_rate=0.05, subsample=0.8, random_state=217 ), n_jobs=-1) } if CATBOOST_AVAILABLE: base_models['CatBoost'] = MultiOutputRegressor(CatBoostRegressor( iterations=300, depth=6, learning_rate=0.05, l2_leaf_reg=10, random_seed=217, verbose=False, thread_count=1 # 每个输出使用1个线程 ), n_jobs=-1) # 添加多层感知机(MLP)模型 base_models['MLP'] = MultiOutputRegressor(MLPRegressor( hidden_layer_sizes=(128, 64, 32), # 三层隐藏层,神经元数量分别为128, 64, 32 activation='relu', # 使用ReLU激活函数 solver='adam', # 使用Adam优化器 alpha=0.001, # L2正则化参数 batch_size='auto', # 自动选择批次大小 learning_rate='adaptive', # 自适应学习率 learning_rate_init=0.001, # 初始学习率 max_iter=500, # 最大迭代次数 shuffle=True, # 每次迭代前打乱数据 random_state=217, # 随机种子 tol=1e-4, # 收敛容忍度 early_stopping=True, # 早停机制 validation_fraction=0.1, # 验证集比例 n_iter_no_change=20, # 早停等待轮数 verbose=False # 不输出训练过程 ), n_jobs=-1) def create_model_instance(name, fold_idx=None): """ 创建模型实例的辅助函数 """ random_seed = 217 if fold_idx is None else 217 + fold_idx if name == 'XGBoost': return XGBRegressor( n_estimators=300, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, reg_alpha=5, reg_lambda=10, min_child_weight=3, random_state=random_seed, n_jobs=-1, tree_method='hist' ) elif name == 'LightGBM': return MultiOutputRegressor(LGBMRegressor( n_estimators=300, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, reg_alpha=5, reg_lambda=10, min_child_samples=20, random_state=random_seed, n_jobs=1, verbose=-1 ), n_jobs=-1) elif name == 'RandomForest': return RandomForestRegressor( n_estimators=200, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=random_seed, n_jobs=-1 ) elif name == 'GradientBoosting': return MultiOutputRegressor(GradientBoostingRegressor( n_estimators=200, max_depth=6, learning_rate=0.05, subsample=0.8, random_state=random_seed ), n_jobs=-1) elif name == 'CatBoost' and CATBOOST_AVAILABLE: return MultiOutputRegressor(CatBoostRegressor( iterations=300, depth=6, learning_rate=0.05, l2_leaf_reg=10, random_seed=random_seed, verbose=False, thread_count=1 ), n_jobs=-1) elif name == 'MLP': return MultiOutputRegressor(MLPRegressor( hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', alpha=0.001, batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001, max_iter=500, shuffle=True, random_state=random_seed, tol=1e-4, early_stopping=True, validation_fraction=0.1, n_iter_no_change=20, verbose=False ), n_jobs=-1) else: # 如果模型不在上述列表中,尝试从base_models复制 if name in base_models: original = base_models[name] if isinstance(original, MultiOutputRegressor): inner = original.estimator return MultiOutputRegressor( type(inner)(**inner.get_params()), n_jobs=-1 ) else: params = original.get_params() if 'random_state' in params: params['random_state'] = random_seed elif 'random_seed' in params: params['random_seed'] = random_seed return type(original)(**params) else: raise ValueError(f"未知的模型名称: {name}") def stacking_predictions(X_train, y_train, X_test, base_models, n_folds=5): """ 使用Stacking方法进行预测 """ n_samples = X_train.shape[0] n_features = y_train.shape[1] n_models = len(base_models) # 存储每个基模型在训练集上的预测(用于训练元模型) train_meta_features = np.zeros((n_samples, n_features * n_models)) # 存储每个基模型在测试集上的预测 test_meta_features = np.zeros((X_test.shape[0], n_features * n_models)) kf = KFold(n_splits=n_folds, shuffle=True, random_state=217) print(f"\n使用 {n_folds} 折交叉验证进行Stacking...") # 对每个基模型进行训练和预测 for model_idx, (name, model) in enumerate(base_models.items()): print(f" 训练基模型: {name}") test_preds = np.zeros((X_test.shape[0], n_features)) for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train)): # 划分训练集和验证集 X_tr = X_train.iloc[train_idx] y_tr = y_train.iloc[train_idx] X_val = X_train.iloc[val_idx] # 训练模型(使用辅助函数创建新实例) model_copy = create_model_instance(name, fold_idx) model_copy.fit(X_tr, y_tr) # 在验证集上预测 val_preds = model_copy.predict(X_val) train_meta_features[val_idx, model_idx*n_features:(model_idx+1)*n_features] = val_preds # 在测试集上预测并平均 test_preds += model_copy.predict(X_test) / n_folds test_meta_features[:, model_idx*n_features:(model_idx+1)*n_features] = test_preds # 使用Ridge回归作为元模型 print(" 训练元模型 (Ridge回归)...") meta_model = Ridge(alpha=1.0) meta_model.fit(train_meta_features, y_train) # 在测试集上预测 final_predictions = meta_model.predict(test_meta_features) return final_predictions def weighted_average_predictions(X_train, y_train, X_test, base_models, weights=None): """ 使用加权平均方法进行预测 """ if weights is None: # 如果没有指定权重,则根据验证集上的表现自动计算权重 print("\n计算模型权重...") kf = KFold(n_splits=5, shuffle=True, random_state=217) model_scores = {} for name, model in base_models.items(): print(f" 评估模型: {name}") scores = [] for train_idx, val_idx in kf.split(X_train): X_tr = X_train.iloc[train_idx] y_tr = y_train.iloc[train_idx] X_val = X_train.iloc[val_idx] y_val = y_train.iloc[val_idx] # 使用辅助函数创建模型实例 model_copy = create_model_instance(name) model_copy.fit(X_tr, y_tr) preds = model_copy.predict(X_val) mae = mean_absolute_error(y_val, preds) scores.append(mae) model_scores[name] = np.mean(scores) print(f" {name} 平均MAE: {model_scores[name]:.6f}") # 根据MAE计算权重(MAE越小,权重越大) # 使用倒数并归一化 inv_scores = {name: 1.0 / score for name, score in model_scores.items()} total_inv = sum(inv_scores.values()) weights = {name: inv_scores[name] / total_inv for name in model_scores.keys()} print("\n模型权重:") for name, weight in weights.items(): print(f" {name}: {weight:.4f}") # 使用权重进行预测 print("\n使用加权平均进行预测...") predictions = None for name, model in base_models.items(): print(f" 训练并预测: {name}") model.fit(X_train, y_train) pred = model.predict(X_test) if predictions is None: predictions = weights[name] * pred else: predictions += weights[name] * pred return predictions # 方法1: Stacking集成 print("\n" + "="*60) print("方法1: Stacking集成") print("="*60) stacking_pred = stacking_predictions(X_train_scaled, y_train, X_test_scaled, base_models, n_folds=5) stacking_mae = mean_absolute_error(y_test, stacking_pred) print(f"\nStacking集成 MAE: {stacking_mae:.6f}") # 方法2: 加权平均集成 print("\n" + "="*60) print("方法2: 加权平均集成") print("="*60) weighted_pred = weighted_average_predictions(X_train_scaled, y_train, X_test_scaled, base_models) weighted_mae = mean_absolute_error(y_test, weighted_pred) print(f"\n加权平均集成 MAE: {weighted_mae:.6f}") # 方法3: 简单平均集成 print("\n" + "="*60) print("方法3: 简单平均集成") print("="*60) simple_pred = None for name, model in base_models.items(): print(f" 训练: {name}") model.fit(X_train_scaled, y_train) pred = model.predict(X_test_scaled) if simple_pred is None: simple_pred = pred else: simple_pred += pred simple_pred /= len(base_models) simple_mae = mean_absolute_error(y_test, simple_pred) print(f"\n简单平均集成 MAE: {simple_mae:.6f}") # 选择最佳集成方法 ensemble_results = { 'Stacking': (stacking_pred, stacking_mae), 'Weighted_Average': (weighted_pred, weighted_mae), 'Simple_Average': (simple_pred, simple_mae) } best_method = min(ensemble_results.keys(), key=lambda x: ensemble_results[x][1]) best_pred, best_mae = ensemble_results[best_method] print("\n" + "="*60) print("集成方法比较:") print("="*60) for method in sorted(ensemble_results.keys(), key=lambda x: ensemble_results[x][1]): mae = ensemble_results[method][1] print(f"{method:20s} - MAE: {mae:.6f}") print("\n" + "="*60) print(f"最佳集成方法: {best_method}") print(f"最佳MAE: {best_mae:.6f}") print("="*60) # 保存结果 results_list = [] for True_Value, Predicted_Value in zip(y_test.values, best_pred): error = np.abs(True_Value - Predicted_Value) formatted_true_value = ' '.join(map(str, True_Value)) formatted_predicted_value = ' '.join(map(str, Predicted_Value)) formatted_error = ' '.join(map(str, error)) results_list.append([formatted_true_value, formatted_predicted_value, formatted_error]) result_df = pd.DataFrame(results_list, columns=['True_Value', 'Predicted_Value', 'Error']) result_file = "result_Ensemble.csv" result_df.to_csv(result_file, index=False) print(f"\n预测结果已保存到: {result_file}") # 计算并显示平均误差 print("\n" + "<*>"*30) data = pd.read_csv(result_file) column3 = data.iloc[:, 2] numbers = column3.str.split(' ', expand=True).apply(pd.to_numeric) means = numbers.mean() print("6个特征的平均误差:") for col, mean_err in zip(columns, means): print(f" {col}: {mean_err:.6f}") print(f"\n总体平均误差: {means.mean():.6f}") print("<*>"*30) end_time = time.time() print(f"\n总耗时:{end_time - start_time:.3f}秒") 我明天要在机器学习课程上讲解我的代码,可以不可以帮我生成一个PPT的内容
11-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值