Boost.Python中的arg_to_python转换器相关测试程序

383 篇文章 ¥29.90 ¥99.00
本文介绍了如何在Boost.Python中使用arg_to_python转换器将C++对象转换为Python对象,并提供了示例代码展示转换器的实现及测试程序,确保转换的正确性。

Boost.Python中的arg_to_python转换器相关测试程序

在使用Boost.Python的过程中,我们经常需要将C++中的对象转换为Python中的对象,这时候就需要用到arg_to_python转换器。为了保证转换的正确性和可靠性,我们需要进行相关的测试程序来验证。

下面是一个示例程序,假设我们有一个类MyClass,我们需要将其转换为Python对象。首先,我们需要实现一个arg_to_python转换器,代码如下:

#include <boost/python.hpp>

class MyClass {
   
   
public:
    int value;
# ========== 模型训练函数 ========== def train_model( model_name: str, X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: np.ndarray, y_test: np.ndarray ) -> np.ndarray: """ 统一模型训练函数 支持模型: 'XGBoost', 'MLP', 'TabNet' """ # 1. 共享预处理 X_tr, X_te, y_tr, y_te = preprocess_data(X_train, X_test, y_train, y_test) # 2. 根据不同模型类型进行训练 if model_name == "XGBoost": # 转换为XGBoost格式 dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=X_train.columns.tolist()) dtest = xgb.DMatrix(X_te, label=y_te, feature_names=X_test.columns.tolist()) # 训练参数 params = { "objective": "multi:softmax", "num_class": len(np.unique(y_tr)), "max_depth": 6, "eta": 0.2, "n_estimators": 45, "subsample": 0.8, "colsample_bytree": 0.8, "gamma": 0.1, "min_child_weight": 3, "lambda": 1, "alpha": 0, "eval_metric": ["merror", "mlogloss"], "early_stopping_rounds": 10, "tree_method": "hist", "random_state": 42 } # 训练并预测 model = xgb.train(params, dtrain, num_boost_round=100, verbose_eval=False) return model.predict(dtest).astype(int) elif model_name == "MLP": # 获取模型实例 model = ModelFactory.create_model(model_name, X_tr.shape[1], len(np.unique(y_tr))) # 训练配置 early_stopping = EarlyStopping( monitor='val_loss', patience=5, restore_best_weights=True ) # 训练并预测 model.fit( X_tr, y_tr, epochs=MLP_EPOCHS, batch_size=MLP_BATCH, validation_data=(X_te, y_te), callbacks=[early_stopping], verbose=0 ) y_pred_prob = model.predict(X_te, verbose=0) return np.argmax(y_pred_prob, axis=1) elif model_name == "TabNet": # 转换为TabNet格式 X_tr = X_tr.astype(np.float32) X_te = X_te.astype(np.float32) y_tr = y_tr.astype(np.int64) y_te = y_te.astype(np.int64) # 获取模型实例 model = ModelFactory.create_model(model_name, X_tr.shape[1], len(np.unique(y_tr))) # 训练并预测 model.fit( X_train=X_tr, y_train=y_tr, eval_set=[(X_te, y_te)], max_epochs=TABNET_EPOCHS, batch_size=TABNET_BATCH, patience=TABNET_PATIENCE, eval_metric=['accuracy'], ) return model.predict(X_te) else: raise ValueError(f"不支持的模型类型: {model_name}") # ========== 交叉验证框架 ========== def run_core_app_cv( models_to_run: list, X: pd.DataFrame, y_encoded: np.ndarray, df: pd.DataFrame, selected_features: list, label_encoder: LabelEncoder ) -> dict: """ 核心App留一交叉验证 :param models_to_run: 要运行的模型列表 :return: 包含所有模型结果的字典 {model_name: results_dict} """ unique_app_cores = df['app_core'].unique() all_results = {} for model_name in models_to_run: print(f"\n{'='*60}\n开始 {model_name} 模型交叉验证\n{'='*60}") results = {"true": [], "pred": [], "cores": [], "indices": []} for app_idx, test_core in enumerate(unique_app_cores, 1): print(f"\n{'='*20} {model_name} - 第 {app_idx}/{len(unique_app_cores)} 个App: {test_core} {'='*20}") # 划分训练/测试集 train_mask = df['app_core'] != test_core test_mask = df['app_core'] == test_core X_train = X[selected_features][train_mask].copy() X_test = X[selected_features][test_mask].copy() y_train = y_encoded[train_mask] y_test = y_encoded[test_mask] print(f"训练集: {X_train.shape[0]}样本, 测试集: {X_test.shape[0]}样本") # 训练并预测 y_pred = train_model(model_name, X_train, X_test, y_train, y_test) # 保存结果 results["true"].extend(label_encoder.inverse_transform(y_test)) results["pred"].extend(label_encoder.inverse_transform(y_pred)) results["cores"].extend([test_core] * len(y_test)) results["indices"].extend(df[test_mask].index.tolist()) all_results[model_name] = results return all_results # ========== 结果保存与分析 ========== def save_results(model_name: str, results: dict, save_dir: str): """保存单模型结果并生成可视化""" # 创建结果DataFrame df_res = df.loc[results["indices"], ['app_name', 'app_core', 'second_id']].reset_index(drop=True) df_res['true_label'] = [category_mapping[str(c).zfill(3)] for c in results["true"]] df_res['pred_label'] = [category_mapping[str(c).zfill(3)] for c in results["pred"]] df_res['correct'] = (np.array(results["true"]) == np.array(results["pred"])).astype(int) df_res.rename(columns={'second_id': 'true_second_id'}, inplace=True) # 保存样本级结果 sample_path = os.path.join(save_dir, f'{model_name}_sample_results.csv') df_res.to_csv(sample_path, index=False, encoding='utf-8-sig') print(f"已保存样本结果到: {sample_path}") # 计算App级准确率 app_acc = df_res.groupby('app_core').agg( total=('correct', 'count'), correct=('correct', 'sum'), accuracy=('correct', 'mean') ).reset_index() app_acc['accuracy'] = app_acc['accuracy'].round(4) # 保存App级结果 app_path = os.path.join(save_dir, f'{model_name}_app_accuracy.csv') app_acc.to_csv(app_path, index=False, encoding='utf_8_sig') print(f"已保存App准确率到: {app_path}") # 生成混淆矩阵 class_names = [category_mapping[c] for c in encoded_classes] cm = confusion_matrix(results["true"], results["pred"], labels=encoded_classes) # 可视化混淆矩阵 fig, ax = plt.subplots(figsize=(12, 10)) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names) disp.plot(cmap='Blues', values_format='d', ax=ax, colorbar=False) plt.title(f'{model_name}混淆矩阵', fontsize=14) plt.xticks(rotation=45, ha='right') plt.tight_layout() # 保存混淆矩阵图 cm_path = os.path.join(save_dir, f'{model_name}_confusion_matrix.png') plt.savefig(cm_path, bbox_inches='tight', dpi=300) plt.close() print(f"已保存混淆矩阵图到: {cm_path}") # 生成分类报告 report = classification_report( results["true"], results["pred"], target_names=class_names, output_dict=True ) # 转换报告为DataFrame并保存 report_df = pd.DataFrame(report).transpose() report_path = os.path.join(save_dir, f'{model_name}_classification_report.csv') report_df.to_csv(report_path, encoding='utf_8_sig') print(f"已保存分类报告到: {report_path}") # 返回关键性能指标 overall_acc = accuracy_score(results["true"], results["pred"]) return { 'model': model_name, 'overall_accuracy': overall_acc, 'app_accuracy': app_acc, 'confusion_matrix_path': cm_path } # ========== 主执行流程 ========== if __name__ == "__main__": print("\n" + "="*60) print("开始执行核心App交叉验证流程") print("="*60 + "\n") # 1. 特征选择 selected_features = perform_feature_selection(X, y_encoded, groups=df['app_core']) feature_file = os.path.join(RESULTS_DIR, 'selected_features.txt') with open(feature_file, 'w') as f: for feature in selected_features: f.write(f"{feature}\n") print("特征已保存至 selected_features.txt") # 2. 执行交叉验证 all_results = run_core_app_cv( models_to_run=SELECTED_MODELS, X=X, y_encoded=y_encoded, df=df, selected_features=selected_features, label_encoder=label_encoder ) # 3. 结果保存与分析 final_metrics = {} for model_name, results in all_results.items(): print(f"\n正在处理{model_name}的结果分析...") metrics = save_results(model_name, results, RESULTS_DIR) final_metrics[model_name] = metrics['overall_accuracy'] # 打印关键指标 print(f"[{model_name}] 总体准确率: {metrics['overall_accuracy']:.4f}") print(f"应用级准确率分布:\n{metrics['app_accuracy'][['app_core', 'accuracy']]}") 我想修改这部分模型,对于xgboost,我希望能够通过不断测试寻找最优的参数
11-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值