plt.title() 把标题置于图像下方

在Python的matplotlib库中,可以使用plt.title()函数将图表标题设置在下方。通过设置y参数为负值,例如y=-0.2,可以实现这一效果。此方法常用于需要调整标题位置以避免遮挡图表内容的情况。
部署运行你感兴趣的模型镜像

您可能感兴趣的与本文相关的镜像

Python3.9

Python3.9

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

# 任务二:基于41维特征的多分类器对比分析(使用10折交叉验证) # 新评价指标:Accuracy, Precision, Recall, F1-Score(宏平均) # 使用源域数据 + 10-Fold CV,避免划分偏差 # 新增功能:保存训练好的随机森林模型供任务三使用 import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import StratifiedKFold from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler from sklearn.metrics import classification_report, confusion_matrix, accuracy_score import warnings import os import joblib # 新增导入 warnings.filterwarnings('ignore') plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False sns.set_style("whitegrid") # ====================== 主程序开始 ====================== print(" 开始加载并处理特征数据...") df = pd.read_csv('extracted_features_with_domain.csv') source_data = df[df['domain'] == 'source'].copy() print(f" 源域样本数: {len(source_data)}") X = source_data.drop(columns=['filename', 'label', 'domain']).values y = source_data['label'].values labels = sorted(np.unique(y)) class_names = ['Normal', 'Outer Race', 'Inner Race', 'Ball'] # 标准化(仅用于需要标准化的模型) scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 定义分类器 models = { "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "SVM_RBF": SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42), "MLP": MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, alpha=1e-4, batch_size=32, early_stopping=True, random_state=42) } # 十折分层交叉验证 cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) results_summary = [] print("\n" + "=" * 60) print(" 正在进行 10-Fold Cross Validation 评估各分类器...") print("=" * 60) for name, model in models.items(): print(f"\n 正在评估 {name}...") # 是否需要标准化? X_use = X_scaled if name in ["SVM_RBF", "MLP"] else X all_y_true, all_y_pred = [], [] for train_idx, test_idx in cv.split(X_use, y): X_train_fold, X_test_fold = X_use[train_idx], X_use[test_idx] y_train_fold, y_test_fold = y[train_idx], y[test_idx] model.fit(X_train_fold, y_train_fold) y_pred_fold = model.predict(X_test_fold) all_y_true.extend(y_test_fold) all_y_pred.extend(y_pred_fold) # 计算总体 Accuracy acc = accuracy_score(all_y_true, all_y_pred) # 获取 classification report(宏平均) report = classification_report(all_y_true, all_y_pred, target_names=class_names, labels=labels, output_dict=True) precision_macro = report['macro avg']['precision'] recall_macro = report['macro avg']['recall'] f1_macro = report['macro avg']['f1-score'] # 存储结果 results_summary.append({ 'Model': name, 'Accuracy': acc, 'Precision': precision_macro, 'Recall': recall_macro, 'F1-Score': f1_macro }) # 输出详细报告 print(f" {name} 10-Fold CV 结果:") print(f" Accuracy = {acc:.3f}") print(f" Precision = {precision_macro:.3f}") print(f" Recall = {recall_macro:.3f}") print(f" F1-Score = {f1_macro:.3f}") # 显示分类报告 print("\n 分类报告 (Classification Report):") print(classification_report(all_y_true, all_y_pred, target_names=class_names)) # 绘制混淆矩阵热力图 cm = confusion_matrix(all_y_true, all_y_pred, labels=labels) plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names) plt.title(f'Confusion Matrix - {name} (10-Fold CV)') plt.xlabel('Predicted') plt.ylabel('True') plt.tight_layout() plt.savefig(f'cm_{name.replace(" ", "_")}_10fold.png', dpi=150) plt.show() # ====================== 综合对比表格 ====================== print("\n" + "=" * 60) print(" 所有模型综合性能对比(10-Fold CV)") print("=" * 60) summary_df = pd.DataFrame(results_summary).round(3) print(summary_df.to_string(index=False)) # 保存为 CSV summary_df.to_csv('model_comparison_summary_10fold_updated.csv', index=False) print(" 综合结果已保存至: model_comparison_summary_10fold_updated.csv") # ====================== 可视化四项指标柱状图 ====================== metrics_plot = ['Accuracy', 'Precision', 'Recall', 'F1-Score'] colors = ['#4E79A7', '#F28E2B', '#E15759'] x_pos = np.arange(len(models)) fig, ax = plt.subplots(figsize=(10, 6)) width = 0.2 for i, metric in enumerate(metrics_plot): values = summary_df[metric].values bars = ax.bar(x_pos + i * width, values, width, label=metric, color=colors[i % len(colors)], alpha=0.8) # 添加数值标签 for bar, val in zip(bars, values): ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f"{val:.3f}", ha='center', va='bottom', fontsize=9, fontweight='bold') ax.set_xlabel('Model') ax.set_ylabel('Score') ax.set_title( 'Model Comparison on Source Domain (41D Features) - 10-Fold CV\nMetrics: Accuracy, Precision, Recall, F1-Score') ax.set_xticks(x_pos + width * 1.5) ax.set_xticklabels(summary_df['Model']) ax.set_ylim(0, 1.0) ax.legend() ax.grid(True, axis='y', linestyle='--', alpha=0.5) plt.tight_layout() plt.savefig('model_performance_comparison_4metrics.png', dpi=150) plt.show() # ====================== 特征重要性(仅RF)====================== if 'Random Forest' in models: # 训练完整随机森林模型 rf_model = RandomForestClassifier(n_estimators=100, random_state=42) rf_model.fit(X, y) # 使用全部源域训练 # 新增:保存模型及元数据 model_dir = "saved_models" os.makedirs(model_dir, exist_ok=True) # 创建保存目录 # 保存核心模型文件 joblib.dump( rf_model, os.path.join(model_dir, "random_forest_model.pkl") ) # 保存元数据(特征名、类别名、标签等) metadata = { 'feature_names': df.columns.drop(['filename', 'label', 'domain']).tolist(), 'class_names': class_names, 'labels': sorted(np.unique(y)), 'preprocessing': 'none' # 表示模型训练时未使用标准化 } joblib.dump( metadata, os.path.join(model_dir, "random_forest_metadata.pkl") ) print(" 模型及元数据已保存至 saved_models/ 目录") # 绘制特征重要性 feat_importance = rf_model.feature_importances_ feature_names = df.columns.drop(['filename', 'label', 'domain']) indices = np.argsort(feat_importance)[::-1][:20] plt.figure(figsize=(10, 6)) plt.barh([feature_names[i] for i in indices[::-1]], feat_importance[indices][::-1], color='steelblue') plt.xlabel('Feature Importance (Gini)') plt.title('Top 20 Important Features - Random Forest (Full Training Set)') plt.gca().invert_yaxis() plt.grid(True, axis='x', linestyle='--', alpha=0.5) plt.tight_layout() plt.savefig('rf_feature_importance_10fold.png', dpi=150) plt.show() print("\n 所有任务完成!请查看生成的图表与CSV文件。") results_summary = [] print("\n" + "=" * 60) print(" 正在进行 10-Fold Cross Validation 评估各分类器...") print("=" * 60) for name, model in models.items(): print(f"\n 正在评估 {name}...") # 是否需要标准化? X_use = X_scaled if name in ["SVM_RBF", "MLP"] else X all_y_true, all_y_pred = [], [] for train_idx, test_idx in cv.split(X_use, y): X_train_fold, X_test_fold = X_use[train_idx], X_use[test_idx] y_train_fold, y_test_fold = y[train_idx], y[test_idx] model.fit(X_train_fold, y_train_fold) y_pred_fold = model.predict(X_test_fold) all_y_true.extend(y_test_fold) all_y_pred.extend(y_pred_fold) # 计算总体 Accuracy acc = accuracy_score(all_y_true, all_y_pred) # 获取 classification report(宏平均) report = classification_report(all_y_true, all_y_pred, target_names=class_names, labels=labels, output_dict=True) precision_macro = report['macro avg']['precision'] recall_macro = report['macro avg']['recall'] f1_macro = report['macro avg']['f1-score'] # 存储结果 results_summary.append({ 'Model': name, 'Accuracy': acc, 'Precision': precision_macro, 'Recall': recall_macro, 'F1-Score': f1_macro }) # 输出详细报告 print(f" {name} 10-Fold CV 结果:") print(f" Accuracy = {acc:.3f}") print(f" Precision = {precision_macro:.3f}") print(f" Recall = {recall_macro:.3f}") print(f" F1-Score = {f1_macro:.3f}") # 显示分类报告 print("\n 分类报告 (Classification Report):") print(classification_report(all_y_true, all_y_pred, target_names=class_names)) # 绘制混淆矩阵热力图 cm = confusion_matrix(all_y_true, all_y_pred, labels=labels) plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names) plt.title(f'Confusion Matrix - {name} (10-Fold CV)') plt.xlabel('Predicted') plt.ylabel('True') plt.tight_layout() plt.savefig(f'cm_{name.replace(" ", "_")}_10fold.png', dpi=150) plt.show() # ====================== 综合对比表格 ====================== print("\n" + "=" * 60) print(" 所有模型综合性能对比(10-Fold CV)") print("=" * 60) summary_df = pd.DataFrame(results_summary).round(3) print(summary_df.to_string(index=False)) # 保存为 CSV summary_df.to_csv('model_comparison_summary_10fold_updated.csv', index=False) print(" 综合结果已保存至: model_comparison_summary_10fold_updated.csv") # ====================== 可视化四项指标柱状图 ====================== metrics_plot = ['Accuracy', 'Precision', 'Recall', 'F1-Score'] colors = ['#4E79A7', '#F28E2B', '#E15759'] x_pos = np.arange(len(models)) fig, ax = plt.subplots(figsize=(10, 6)) width = 0.2 for i, metric in enumerate(metrics_plot): values = summary_df[metric].values bars = ax.bar(x_pos + i * width, values, width, label=metric, color=colors[i % len(colors)], alpha=0.8) # 添加数值标签 for bar, val in zip(bars, values): ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f"{val:.3f}", ha='center', va='bottom', fontsize=9, fontweight='bold') ax.set_xlabel('Model') ax.set_ylabel('Score') ax.set_title( 'Model Comparison on Source Domain (41D Features) - 10-Fold CV\nMetrics: Accuracy, Precision, Recall, F1-Score') ax.set_xticks(x_pos + width * 1.5) ax.set_xticklabels(summary_df['Model']) ax.set_ylim(0, 1.0) ax.legend() ax.grid(True, axis='y', linestyle='--', alpha=0.5) plt.tight_layout() plt.savefig('model_performance_comparison_4metrics.png', dpi=150) plt.show() # ====================== 特征重要性(仅RF)====================== if 'Random Forest' in models: # 训练完整随机森林模型 rf_model = RandomForestClassifier(n_estimators=100, random_state=42) rf_model.fit(X, y) # 使用全部源域训练 # 新增:保存模型及元数据 model_dir = "saved_models" os.makedirs(model_dir, exist_ok=True) # 创建保存目录 # 保存核心模型文件 joblib.dump( rf_model, os.path.join(model_dir, "random_forest_model.pkl") ) # 保存元数据(特征名、类别名、标签等) metadata = { 'feature_names': df.columns.drop(['filename', 'label', 'domain']).tolist(), 'class_names': class_names, 'labels': sorted(np.unique(y)), 'preprocessing': 'none' # 表示模型训练时未使用标准化 } joblib.dump( metadata, os.path.join(model_dir, "random_forest_metadata.pkl") ) print(" 模型及元数据已保存至 saved_models/ 目录") # 绘制特征重要性 feat_importance = rf_model.feature_importances_ feature_names = df.columns.drop(['filename', 'label', 'domain']) indices = np.argsort(feat_importance)[::-1][:20] plt.figure(figsize=(10, 6)) plt.barh([feature_names[i] for i in indices[::-1]], feat_importance[indices][::-1], color='steelblue') plt.xlabel('Feature Importance (Gini)') plt.title('Top 20 Important Features - Random Forest (Full Training Set)') plt.gca().invert_yaxis() plt.grid(True, axis='x', linestyle='--', alpha=0.5) plt.tight_layout() plt.savefig('rf_feature_importance_10fold.png', dpi=150) plt.show() print("\n 所有任务完成!请查看生成的图表与CSV文件。") 这段代码的输出结果之一Top 20 Importance Features-Random Forest(Full training Set),这个图,现在想生成一个二维的这种图,也就是在这个图的基础上,更美观,更高级,那么,原代码是哪一段生成了原本的一维图。现在只想增加一段新的代码,来生成这个二维图,又要怎么增加代码
最新发布
09-25
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值