seaborn: ValueError: List of boxplot statistics and `positions` values must have same the length

出现这个问题的原因是

seaborn.boxplot(data=df,x='col1',y='col2')

df中,作为数值的y列col2的类型不是数值类型,而是object类型,前面作一个astype np.float即可解决

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import os import locale import sys from pathlib import Path # 解决中文乱码问题 try: # 设置matplotlib中文显示 if os.name == 'nt': # Windows系统 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS'] else: # Mac/Linux系统 plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'Arial Unicode MS'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 12 # 设置控制台编码 if hasattr(sys.stdout, 'encoding') and sys.stdout.encoding not in ['UTF-8', 'utf-8']: try: sys.stdout.reconfigure(encoding='utf-8') except: pass print("中文显示设置完成") except Exception as e: print("中文设置错误:", e) def enhanced_critic_weights(data, target_indicators=None, cost_indicators=None, epsilon=1e-10): """ 增强版CRITIC权重计算函数 """ # 转换数据为NumPy数组 if isinstance(data, pd.DataFrame): df = data.copy() X = df.values.astype(np.float64) columns = df.columns.tolist() else: X = np.asarray(data, dtype=np.float64) columns = [f"Var_{i}" for i in range(X.shape[1])] # 处理成本型指标 cost_col_indices = [] if cost_indicators: for col in cost_indicators: if col in columns: cost_col_indices.append(columns.index(col)) # 处理靶心型指标 target_col_indices = [] if target_indicators: for col, target_val in target_indicators.items(): if col in columns: col_idx = columns.index(col) # 计算绝对偏差 |x - target| X[:, col_idx] = np.abs(X[:, col_idx] - target_val) # 将靶心型指标视为成本型 if col_idx not in cost_col_indices: cost_col_indices.append(col_idx) target_col_indices.append(col_idx) # 处理缺失值和无穷值 X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) X = np.where(np.isinf(X), np.nan, X) X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) # 计算基本统计量 min_vals = np.nanmin(X, axis=0) max_vals = np.nanmax(X, axis=0) ranges = max_vals - min_vals ranges[ranges < epsilon] = epsilon # 标准化处理 Z = np.zeros_like(X) for j in range(X.shape[1]): if j in cost_col_indices: Z[:, j] = (max_vals[j] - X[:, j]) / ranges[j] else: Z[:, j] = (X[:, j] - min_vals[j]) / ranges[j] # 计算标准化值的和 standardized_sums = np.sum(Z, axis=0) # 计算标准差 std_dev = np.std(Z, axis=0, ddof=1) # 计算相关系数矩阵 corr_matrix = np.corrcoef(Z, rowvar=False) # 计算冲突性 conflict = np.sum(1 - np.abs(corr_matrix), axis=1) # 计算信息量 information = std_dev * conflict # 计算权重 weights = information / np.sum(information) # 整理结果信息 info = { "columns": columns, "std_dev": std_dev, "conflict": conflict, "information": information, "target_col_indices": target_col_indices, "cost_col_indices": cost_col_indices, "standardized_sums": standardized_sums, "standardized_data": Z } return weights, info def stability_analysis_with_noise(data, critic_func, noise_levels=[0.01, 0.05, 0.1], n_iter=100, **critic_kwargs): """ CRITIC权重稳定性分析 """ # 计算原始权重 original_weights, original_info = critic_func(data, **critic_kwargs) columns = original_info['columns'] # 初始化结果存储 stability_results = { 'noise_levels': noise_levels, 'original_weights': original_weights, 'weight_matrix': {}, 'stability_stats': {} } # 对每个噪声水平进行分析 for noise_level in noise_levels: weight_matrix = [] for i in range(n_iter): # 复制原始数据 if isinstance(data, pd.DataFrame): noisy_data = data.copy() else: noisy_data = data.copy() # 为每列添加独立高斯噪声 for j, col in enumerate(columns): if col in data.columns: std_dev = np.std(data[col]) noise = np.random.normal(0, noise_level * std_dev, size=len(data)) noisy_data[col] += noise # 计算带噪声数据的权重 weights, _ = critic_func(noisy_data, **critic_kwargs) weight_matrix.append(weights) # 转换为数组 weight_matrix = np.array(weight_matrix) stability_results['weight_matrix'][noise_level] = weight_matrix # 计算统计量 mean_weights = np.mean(weight_matrix, axis=0) std_weights = np.std(weight_matrix, axis=0) cv_weights = std_weights / np.where(mean_weights != 0, mean_weights, 1e-10) # 避免除零 # 创建DataFrame存储统计结果 stability_stats = pd.DataFrame({ '指标': columns, '原始权重': original_weights, '平均权重': mean_weights, '标准差': std_weights, '变异系数': cv_weights, '噪声水平': noise_level }) stability_results['stability_stats'][noise_level] = stability_stats # 可视化结果 visualize_stability_results(stability_results) # 保存报告 save_stability_report(stability_results, original_info) return stability_results def visualize_stability_results(results): """可视化稳定性分析结果""" try: if os.name == 'nt': # Windows title_font = {'family': 'SimHei', 'size': 16} label_font = {'family': 'Microsoft YaHei', 'size': 14} else: # Mac/Linux title_font = {'family': 'WenQuanYi Micro Hei', 'size': 16} label_font = {'family': 'WenQuanYi Zen Hei', 'size': 14} except: title_font = {'size': 16} label_font = {'size': 14} plt.figure(figsize=(15, 10)) noise_levels = results['noise_levels'] columns = results['original_weights'].index if isinstance(results['original_weights'], pd.Series) else range( len(results['original_weights'])) # 1. 权重变化箱线图 plt.subplot(2, 2, 1) for i, noise_level in enumerate(noise_levels): weight_matrix = results['weight_matrix'][noise_level] weight_changes = [] for j in range(weight_matrix.shape[1]): orig_weight = results['original_weights'][j] changes = (weight_matrix[:, j] - orig_weight) / orig_weight * 100 weight_changes.append(changes) plt.boxplot(weight_changes, positions=np.arange(len(columns)) + i * 0.2, widths=0.15, patch_artist=True, boxprops=dict(facecolor=f'C{i}', alpha=0.7)) plt.axhline(y=0, color='r', linestyle='--') plt.xticks(np.arange(len(columns)), columns, rotation=45, ha='right', fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('权重变化百分比 (%)', fontproperties=label_font) plt.title('不同噪声水平下的权重变化', fontproperties=title_font) legend_labels = [f'{level * 100:.0f}%噪声' for level in noise_levels] plt.legend(legend_labels, title='噪声水平', prop=label_font) # 2. 变异系数热力图 plt.subplot(2, 2, 2) cv_matrix = [] for noise_level in noise_levels: stats_df = results['stability_stats'][noise_level] cv_matrix.append(stats_df['变异系数'].values) cv_matrix = np.array(cv_matrix) ax = sns.heatmap(cv_matrix, annot=True, fmt=".3f", cmap="YlGnBu_r", xticklabels=columns, yticklabels=[f'{level * 100:.0f}%' for level in noise_levels]) plt.setp(ax.get_xticklabels(), fontproperties=label_font) plt.setp(ax.get_yticklabels(), fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('噪声水平', fontproperties=label_font) plt.title('变异系数热力图', fontproperties=title_font) # 3. 权重排序稳定性 plt.subplot(2, 2, 3) rank_stability = [] for noise_level in noise_levels: weight_matrix = results['weight_matrix'][noise_level] rank_corrs = [] for i in range(weight_matrix.shape[0]): ranks = np.argsort(np.argsort(weight_matrix[i])) orig_ranks = np.argsort(np.argsort(results['original_weights'])) corr = np.corrcoef(ranks, orig_ranks)[0, 1] rank_corrs.append(corr) rank_stability.append(rank_corrs) plt.boxplot(rank_stability, labels=[f'{level * 100:.0f}%' for level in noise_levels]) plt.axhline(y=1, color='g', linestyle='--', alpha=0.7) plt.axhline(y=0.9, color='orange', linestyle='--', alpha=0.7) plt.title('权重排序稳定性', fontproperties=title_font) plt.xlabel('噪声水平', fontproperties=label_font) plt.ylabel('排名相关系数', fontproperties=label_font) # 4. 关键指标权重变化 plt.subplot(2, 2, 4) top_indices = np.argsort(results['original_weights'])[-3:] top_columns = [columns[i] for i in top_indices] for j, col in enumerate(top_columns): col_index = list(columns).index(col) weight_changes = [] for noise_level in noise_levels: weight_matrix = results['weight_matrix'][noise_level] orig_weight = results['original_weights'][col_index] changes = (weight_matrix[:, col_index] - orig_weight) / orig_weight * 100 weight_changes.append(np.mean(np.abs(changes))) plt.plot(noise_levels, weight_changes, 'o-', label=col) plt.title('关键指标权重变化', fontproperties=title_font) plt.xlabel('噪声水平', fontproperties=label_font) plt.ylabel('平均绝对变化百分比 (%)', fontproperties=label_font) plt.legend(prop=label_font) plt.grid(alpha=0.3) plt.tight_layout() try: plt.savefig('CRITIC权重稳定性分析.png', dpi=300, bbox_inches='tight') print("可视化图表已保存为 'CRITIC权重稳定性分析.png'") except Exception as e: print(f"保存图片时出错: {e}") try: plt.show() except Exception as e: print(f"显示图表时出错: {e}") def save_stability_report(results, info): """保存稳定性分析报告""" report = "CRITIC权重稳定性分析报告\n" report += "=" * 80 + "\n\n" # 1. 原始权重信息 report += "原始权重信息:\n" report += "-" * 50 + "\n" for i, col in enumerate(info['columns']): report += f"{col}: {results['original_weights'][i]:.6f} " report += f"(标准差: {info['std_dev'][i]:.4f}, " report += f"冲突性: {info['conflict'][i]:.4f}, " report += f"信息量: {info['information'][i]:.4f})\n" # 2. 稳定性统计 report += "\n\n稳定性分析结果:\n" report += "=" * 50 + "\n" for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] report += f"\n噪声水平: {noise_level * 100:.1f}%\n" report += "-" * 40 + "\n" report += f"{'指标':<20}{'原始权重':>12}{'平均权重':>12}{'标准差':>10}{'变异系数':>12}\n" for _, row in stats_df.iterrows(): report += f"{row['指标']:<20}{row['原始权重']:>12.6f}{row['平均权重']:>12.6f}" report += f"{row['标准差']:>10.6f}{row['变异系数']:>12.4f}\n" # 3. 稳定性评估 report += "\n\n稳定性评估:\n" report += "=" * 50 + "\n" # 计算整体稳定性指标 overall_stability = {} for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] avg_cv = stats_df['变异系数'].mean() max_cv = stats_df['变异系数'].max() overall_stability[noise_level] = { '平均变异系数': avg_cv, '最大变异系数': max_cv } # 评估稳定性 if max_cv < 0.1: stability_level = "极高稳定性" elif max_cv < 0.2: stability_level = "高稳定性" elif max_cv < 0.3: stability_level = "中等稳定性" else: stability_level = "低稳定性" report += (f"噪声水平 {noise_level * 100:.1f}%: " f"平均变异系数={avg_cv:.4f}, 最大变异系数={max_cv:.4f} → {stability_level}\n") # 4. 关键指标稳定性分析 report += "\n关键指标稳定性分析:\n" report += "-" * 50 + "\n" # 找出权重最大的3个指标 top_indices = np.argsort(results['original_weights'])[-3:] top_columns = [info['columns'][i] for i in top_indices] for col in top_columns: col_index = info['columns'].index(col) cv_values = [] for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] cv = stats_df[stats_df['指标'] == col]['变异系数'].values[0] cv_values.append(cv) avg_cv = np.mean(cv_values) report += f"指标 '{col}' (权重={results['original_weights'][col_index]:.4f}): " report += f"平均变异系数={avg_cv:.4f} → " if avg_cv < 0.1: report += "稳定性优异\n" elif avg_cv < 0.2: report += "稳定性良好\n" elif avg_cv < 0.3: report += "稳定性一般\n" else: report += "稳定性较差\n" # 保存报告 try: with open('CRITIC_稳定性分析报告.txt', 'w', encoding='utf-8') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (UTF-8编码)") except: try: with open('CRITIC_稳定性分析报告.txt', 'w') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (系统默认编码)") except Exception as e: print(f"保存报告时出错: {e}") print("\n" + "=" * 80) print("CRITIC权重稳定性分析报告(控制台输出)") print("=" * 80) print(report) def load_custom_data(file_path): """ 加载自定义CSV数据文件 """ try: # 尝试多种常见编码 encodings = ['utf-8', 'gbk', 'latin1', 'iso-8859-1'] for encoding in encodings: try: data = pd.read_csv(file_path, encoding=encoding) print(f"使用 {encoding} 编码成功加载数据") return data except UnicodeDecodeError: continue # 如果所有编码都失败,尝试自动检测 try: import chardet with open(file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] data = pd.read_csv(file_path, encoding=encoding) print(f"使用自动检测的 {encoding} 编码成功加载数据") return data except: raise ValueError("无法确定文件编码,请手动指定") except Exception as e: print(f"加载文件时出错: {e}") return None def validate_data(data): """ 验证数据质量 """ if data is None: return False print("\n数据验证:") print(f"- 数据维度: {data.shape[0]}行 × {data.shape[1]}列") # 检查缺失值 missing_values = data.isnull().sum().sum() if missing_values > 0: print(f"- 警告: 发现 {missing_values} 个缺失值,将使用列均值填充") # 检查非数值列 non_numeric_cols = data.select_dtypes(exclude=['number']).columns.tolist() if non_numeric_cols: print(f"- 错误: 发现非数值列: {non_numeric_cols}") print(" 请确保所有列都是数值类型") return False # 检查列数 if len(data.columns) < 2: print("- 错误: 数据至少需要2列指标") return False # 检查行数 if len(data) < 10: print("- 警告: 数据行数较少 (<10),结果可能不可靠") return True # 主程序入口 if __name__ == "__main__": print("=" * 80) print("CRITIC权重稳定性分析程序") print("=" * 80) # 1. 加载自定义CSV数据 data_file = "xunlian.csv" # 替换为您的CSV文件路径 custom_data = load_custom_data(data_file) if custom_data is None: print("无法加载数据,程序终止") exit(1) # 2. 验证数据质量 if not validate_data(custom_data): print("数据验证失败,请修正数据后重试") exit(1) print("\n数据前5行预览:") print(custom_data.head()) # 3. 指定指标类型 # 靶心型指标:字典形式 {指标列名: 目标值} target_spec = { "凝结水过冷度": 0, # 我们希望凝结水过冷度越接近0越好 "炉膛负压": -100 # 我们希望炉膛负压越接近-100越好 } # 成本型指标:列表形式,值越小越好 cost_indicators = [ "低压缸排汽压力", "发电机定子绕组温度", "SO2", "NOx", "烟尘", "供电煤耗率", "厂用电率", "Co2" ] # 4. 确保指定的列名在数据中存在 all_columns = custom_data.columns.tolist() missing_targets = [col for col in target_spec if col not in all_columns] missing_costs = [col for col in cost_indicators if col not in all_columns] if missing_targets: print(f"警告: 以下靶心型指标在数据中不存在: {missing_targets}") if missing_costs: print(f"警告: 以下成本型指标在数据中不存在: {missing_costs}") # 5. 计算原始权重 print("\n计算CRITIC权重中...") weights, info = enhanced_critic_weights( custom_data, target_indicators=target_spec, cost_indicators=cost_indicators ) # 打印原始权重 print("\n原始权重分配:") for col, weight in zip(info["columns"], weights): print(f"{col}: {weight:.6f}") # 6. 进行稳定性分析 print("\n进行稳定性分析...") stability_results = stability_analysis_with_noise( custom_data, critic_func=enhanced_critic_weights, noise_levels=[0.01, 0.03, 0.05, 0.1], # 1%, 3%, 5%, 10%噪声 n_iter=50, # 迭代次数(可根据需要调整) target_indicators=target_spec, cost_indicators=cost_indicators ) print("\n分析完成!") print("结果已保存到:") print("- CRITIC权重稳定性分析.png") print("- CRITIC_稳定性分析报告.txt") 对于这串代码,应该如何修改
11-15
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import os import locale import sys from pathlib import Path # 解决中文乱码问题 try: # 设置matplotlib中文显示 if os.name == 'nt': # Windows系统 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS'] else: # Mac/Linux系统 plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'Arial Unicode MS'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 12 # 设置控制台编码 if hasattr(sys.stdout, 'encoding') and sys.stdout.encoding not in ['UTF-8', 'utf-8']: try: sys.stdout.reconfigure(encoding='utf-8') except: pass print("中文显示设置完成") except Exception as e: print("中文设置错误:", e) def enhanced_critic_weights(data, target_indicators=None, cost_indicators=None, epsilon=1e-10): """ 增强版CRITIC权重计算函数 """ # 转换数据为NumPy数组 if isinstance(data, pd.DataFrame): df = data.copy() X = df.values.astype(np.float64) columns = df.columns.tolist() else: X = np.asarray(data, dtype=np.float64) columns = [f"Var_{i}" for i in range(X.shape[1])] # 处理成本型指标 cost_col_indices = [] if cost_indicators: for col in cost_indicators: if col in columns: cost_col_indices.append(columns.index(col)) # 处理靶心型指标 target_col_indices = [] if target_indicators: for col, target_val in target_indicators.items(): if col in columns: col_idx = columns.index(col) # 计算绝对偏差 |x - target| X[:, col_idx] = np.abs(X[:, col_idx] - target_val) # 将靶心型指标视为成本型 if col_idx not in cost_col_indices: cost_col_indices.append(col_idx) target_col_indices.append(col_idx) # 处理缺失值和无穷值 X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) X = np.where(np.isinf(X), np.nan, X) X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) # 计算基本统计量 min_vals = np.nanmin(X, axis=0) max_vals = np.nanmax(X, axis=0) ranges = max_vals - min_vals ranges[ranges < epsilon] = epsilon # 标准化处理 Z = np.zeros_like(X) for j in range(X.shape[1]): if j in cost_col_indices: Z[:, j] = (max_vals[j] - X[:, j]) / ranges[j] else: Z[:, j] = (X[:, j] - min_vals[j]) / ranges[j] # 计算标准化值的和 standardized_sums = np.sum(Z, axis=0) # 计算标准差 std_dev = np.std(Z, axis=0, ddof=1) # 计算相关系数矩阵 corr_matrix = np.corrcoef(Z, rowvar=False) # 计算冲突性 conflict = np.sum(1 - np.abs(corr_matrix), axis=1) # 计算信息量 information = std_dev * conflict # 计算权重 weights = information / np.sum(information) # 整理结果信息 info = { "columns": columns, "std_dev": std_dev, "conflict": conflict, "information": information, "target_col_indices": target_col_indices, "cost_col_indices": cost_col_indices, "standardized_sums": standardized_sums, "standardized_data": Z } return weights, info def stability_analysis_with_noise(data, critic_func, noise_levels=[0.01, 0.05, 0.1], n_iter=100, **critic_kwargs): """ CRITIC权重稳定性分析 """ # 计算原始权重 original_weights, original_info = critic_func(data, **critic_kwargs) columns = original_info['columns'] # 初始化结果存储 stability_results = { 'noise_levels': noise_levels, 'original_weights': original_weights, 'weight_matrix': {}, 'stability_stats': {} } # 对每个噪声水平进行分析 for noise_level in noise_levels: weight_matrix = [] for i in range(n_iter): # 复制原始数据 if isinstance(data, pd.DataFrame): noisy_data = data.copy() else: noisy_data = data.copy() # 为每列添加独立高斯噪声 for j, col in enumerate(columns): if col in data.columns: std_dev = np.std(data[col]) noise = np.random.normal(0, noise_level * std_dev, size=len(data)) noisy_data[col] += noise # 计算带噪声数据的权重 weights, _ = critic_func(noisy_data, **critic_kwargs) weight_matrix.append(weights) # 转换为数组 weight_matrix = np.array(weight_matrix) stability_results['weight_matrix'][noise_level] = weight_matrix # 计算统计量 mean_weights = np.mean(weight_matrix, axis=0) std_weights = np.std(weight_matrix, axis=0) cv_weights = std_weights / np.where(mean_weights != 0, mean_weights, 1e-10) # 避免除零 # 创建DataFrame存储统计结果 stability_stats = pd.DataFrame({ '指标': columns, '原始权重': original_weights, '平均权重': mean_weights, '标准差': std_weights, '变异系数': cv_weights, '噪声水平': noise_level }) stability_results['stability_stats'][noise_level] = stability_stats # 可视化结果 visualize_stability_results(stability_results) # 保存报告 save_stability_report(stability_results, original_info) return stability_results def visualize_stability_results(results): """可视化稳定性分析结果 - 只绘制箱线图和热力图""" try: if os.name == 'nt': # Windows title_font = {'family': 'SimHei', 'size': 16} label_font = {'family': 'Microsoft YaHei', 'size': 14} else: # Mac/Linux title_font = {'family': 'WenQuanYi Micro Hei', 'size': 16} label_font = {'family': 'WenQuanYi Zen Hei', 'size': 14} except: title_font = {'size': 16} label_font = {'size': 14} noise_levels = results['noise_levels'] columns = results['original_weights'].index if isinstance(results['original_weights'], pd.Series) else range( len(results['original_weights'])) color_palette = plt.cm.viridis(np.linspace(0, 1, len(noise_levels))) # =================================== # 1. 权重变化箱线图(独立图表) # =================================== plt.figure(figsize=(14, 8)) # 为每个噪声水平创建不同的颜色 for i, noise_level in enumerate(noise_levels): weight_matrix = results['weight_matrix'][noise_level] weight_changes = [] for j in range(weight_matrix.shape[1]): orig_weight = results['original_weights'][j] changes = (weight_matrix[:, j] - orig_weight) / orig_weight * 100 weight_changes.append(changes) # 绘制箱线图 box = plt.boxplot(weight_changes, positions=np.arange(len(columns)) + i * 0.2, widths=0.15, patch_artist=True, boxprops=dict(facecolor=color_palette[i], alpha=0.7), medianprops=dict(color='black', linewidth=1.5)) # 添加参考线、标签和标题 plt.axhline(y=0, color='r', linestyle='--', linewidth=1.5) plt.xticks(np.arange(len(columns)), columns, rotation=45, ha='right', fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('权重变化百分比 (%)', fontproperties=label_font) plt.title('不同噪声水平下的权重变化', fontproperties=title_font) # 创建自定义图例 - 放在右下角 legend_handles = [plt.Rectangle((0, 0), 1, 1, color=color_palette[i], alpha=0.7) for i in range(len(noise_levels))] legend_labels = [f'{level * 100:.0f}%噪声' for level in noise_levels] # 将图例放在右下角并添加边框 plt.legend(legend_handles, legend_labels, title='噪声水平', prop=label_font, loc='lower right', # 放在右下角 bbox_to_anchor=(0.98, 0.02), # 精确控制位置 frameon=True, # 添加边框 framealpha=0.8, # 设置透明度 fontsize=11, # 字体大小 title_fontsize=12) # 标题字体大小 plt.grid(axis='y', linestyle='--', alpha=0.3) # 调整布局以确保图例不遮挡 plt.tight_layout(rect=[0, 0, 1, 0.95]) # 调整布局区域 # 保存箱线图 try: plt.savefig('权重变化箱线图.png', dpi=300, bbox_inches='tight') print("权重变化箱线图已保存为 '权重变化箱线图.png'") except Exception as e: print(f"保存箱线图时出错: {e}") # 显示或关闭当前图表 try: plt.show() except Exception as e: print(f"显示箱线图时出错: {e}") # 关闭当前图表 plt.close() # =================================== # 2. 变异系数热力图(独立图表) # =================================== plt.figure(figsize=(14, 8)) # 准备热力图数据 cv_matrix = [] for noise_level in noise_levels: stats_df = results['stability_stats'][noise_level] cv_matrix.append(stats_df['变异系数'].values) cv_matrix = np.array(cv_matrix) # 创建热力图 ax = sns.heatmap(cv_matrix, annot=True, fmt=".3f", cmap="coolwarm", xticklabels=columns, yticklabels=[f'{level * 100:.0f}%' for level in noise_levels]) # 设置标签和标题 plt.setp(ax.get_xticklabels(), fontproperties=label_font, rotation=45, ha='right') plt.setp(ax.get_yticklabels(), fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('噪声水平', fontproperties=label_font) plt.title('权重变异系数热力图', fontproperties=title_font) # 添加颜色条标签 cbar = ax.collections[0].colorbar cbar.set_label('变异系数 (CV)', rotation=270, labelpad=20, fontproperties=label_font) plt.tight_layout() # 保存热力图 try: plt.savefig('变异系数热力图.png', dpi=300, bbox_inches='tight') print("变异系数热力图已保存为 '变异系数热力图.png'") except Exception as e: print(f"保存热力图时出错: {e}") # 显示或关闭当前图表 try: plt.show() except Exception as e: print(f"显示热力图时出错: {e}") # 关闭当前图表 plt.close() def save_stability_report(results, info): """保存稳定性分析报告""" report = "CRITIC权重稳定性分析报告\n" report += "=" * 80 + "\n\n" # 1. 原始权重信息 report += "原始权重信息:\n" report += "-" * 50 + "\n" for i, col in enumerate(info['columns']): report += f"{col}: {results['original_weights'][i]:.6f} " report += f"(标准差: {info['std_dev'][i]:.4f}, " report += f"冲突性: {info['conflict'][i]:.4f}, " report += f"信息量: {info['information'][i]:.4f})\n" # 2. 稳定性统计 report += "\n\n稳定性分析结果:\n" report += "=" * 50 + "\n" for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] report += f"\n噪声水平: {noise_level * 100:.1f}%\n" report += "-" * 40 + "\n" report += f"{'指标':<20}{'原始权重':>12}{'平均权重':>12}{'标准差':>10}{'变异系数':>12}\n" for _, row in stats_df.iterrows(): report += f"{row['指标']:<20}{row['原始权重']:>12.6f}{row['平均权重']:>12.6f}" report += f"{row['标准差']:>10.6f}{row['变异系数']:>12.4f}\n" # 3. 稳定性评估 report += "\n\n稳定性评估:\n" report += "=" * 50 + "\n" # 计算整体稳定性指标 overall_stability = {} for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] avg_cv = stats_df['变异系数'].mean() max_cv = stats_df['变异系数'].max() overall_stability[noise_level] = { '平均变异系数': avg_cv, '最大变异系数': max_cv } # 评估稳定性 if max_cv < 0.1: stability_level = "极高稳定性" elif max_cv < 0.2: stability_level = "高稳定性" elif max_cv < 0.3: stability_level = "中等稳定性" else: stability_level = "低稳定性" report += (f"噪声水平 {noise_level * 100:.1f}%: " f"平均变异系数={avg_cv:.4f}, 最大变异系数={max_cv:.4f} → {stability_level}\n") # 4. 关键指标稳定性分析 report += "\n关键指标稳定性分析:\n" report += "-" * 50 + "\n" # 找出权重最大的3个指标 top_indices = np.argsort(results['original_weights'])[-3:] top_columns = [info['columns'][i] for i in top_indices] for col in top_columns: col_index = info['columns'].index(col) cv_values = [] for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] cv = stats_df[stats_df['指标'] == col]['变异系数'].values[0] cv_values.append(cv) avg_cv = np.mean(cv_values) report += f"指标 '{col}' (权重={results['original_weights'][col_index]:.4f}): " report += f"平均变异系数={avg_cv:.4f} → " if avg_cv < 0.1: report += "稳定性优异\n" elif avg_cv < 0.2: report += "稳定性良好\n" elif avg_cv < 0.3: report += "稳定性一般\n" else: report += "稳定性较差\n" # 保存报告 try: with open('CRITIC_稳定性分析报告.txt', 'w', encoding='utf-8') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (UTF-8编码)") except: try: with open('CRITIC_稳定性分析报告.txt', 'w') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (系统默认编码)") except Exception as e: print(f"保存报告时出错: {e}") print("\n" + "=" * 80) print("CRITIC权重稳定性分析报告(控制台输出)") print("=" * 80) print(report) def load_custom_data(file_path): """ 加载自定义CSV数据文件 """ try: # 尝试多种常见编码 encodings = ['utf-8', 'gbk', 'latin1', 'iso-8859-1'] for encoding in encodings: try: data = pd.read_csv(file_path, encoding=encoding) print(f"使用 {encoding} 编码成功加载数据") return data except UnicodeDecodeError: continue # 如果所有编码都失败,尝试自动检测 try: import chardet with open(file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] data = pd.read_csv(file_path, encoding=encoding) print(f"使用自动检测的 {encoding} 编码成功加载数据") return data except: raise ValueError("无法确定文件编码,请手动指定") except Exception as e: print(f"加载文件时出错: {e}") return None def validate_data(data): """ 验证数据质量 """ if data is None: return False print("\n数据验证:") print(f"- 数据维度: {data.shape[0]}行 × {data.shape[1]}列") # 检查缺失值 missing_values = data.isnull().sum().sum() if missing_values > 0: print(f"- 警告: 发现 {missing_values} 个缺失值,将使用列均值填充") # 检查非数值列 non_numeric_cols = data.select_dtypes(exclude=['number']).columns.tolist() if non_numeric_cols: print(f"- 错误: 发现非数值列: {non_numeric_cols}") print(" 请确保所有列都是数值类型") return False # 检查列数 if len(data.columns) < 2: print("- 错误: 数据至少需要2列指标") return False # 检查行数 if len(data) < 10: print("- 警告: 数据行数较少 (<10),结果可能不可靠") return True # 主程序入口 if __name__ == "__main__": print("=" * 80) print("CRITIC权重稳定性分析程序") print("=" * 80) # 1. 加载自定义CSV数据 data_file = "xunlian.csv" # 替换为您的CSV文件路径 custom_data = load_custom_data(data_file) if custom_data is None: print("无法加载数据,程序终止") exit(1) # 2. 验证数据质量 if not validate_data(custom_data): print("数据验证失败,请修正数据后重试") exit(1) print("\n数据前5行预览:") print(custom_data.head()) # 3. 指定指标类型 # 靶心型指标:字典形式 {指标列名: 目标值} target_spec = { "凝结水过冷度": 0, # 我们希望凝结水过冷度越接近0越好 "炉膛负压": -100 # 我们希望炉膛负压越接近-100越好 } # 成本型指标:列表形式,值越小越好 cost_indicators = [ "低压缸排汽压力", "发电机定子绕组温度", "SO2", "NOx", "烟尘", "供电煤耗率", "厂用电率", "Co2" ] # 4. 确保指定的列名在数据中存在 all_columns = custom_data.columns.tolist() missing_targets = [col for col in target_spec if col not in all_columns] missing_costs = [col for col in cost_indicators if col not in all_columns] if missing_targets: print(f"警告: 以下靶心型指标在数据中不存在: {missing_targets}") if missing_costs: print(f"警告: 以下成本型指标在数据中不存在: {missing_costs}") # 5. 计算原始权重 print("\n计算CRITIC权重中...") weights, info = enhanced_critic_weights( custom_data, target_indicators=target_spec, cost_indicators=cost_indicators ) # 打印原始权重 print("\n原始权重分配:") for col, weight in zip(info["columns"], weights): print(f"{col}: {weight:.6f}") # 6. 进行稳定性分析 print("\n进行稳定性分析...") stability_results = stability_analysis_with_noise( custom_data, critic_func=enhanced_critic_weights, noise_levels=[0.01, 0.03, 0.05], # 1%, 3%, 5%, 10%噪声 n_iter=50, # 迭代次数(可根据需要调整) target_indicators=target_spec, cost_indicators=cost_indicators ) print("\n分析完成!") print("结果已保存到:") print("- 权重变化箱线图.png") print("- 变异系数热力图.png") print("- CRITIC_稳定性分析报告.txt") 我想让他生成的图横坐标是对应的指标而不是数字
最新发布
11-18
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import os import locale import sys from pathlib import Path # 解决中文乱码问题 try: # 设置matplotlib中文显示 if os.name == 'nt': # Windows系统 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS'] else: # Mac/Linux系统 plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'Arial Unicode MS'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 12 # 设置控制台编码 if hasattr(sys.stdout, 'encoding') and sys.stdout.encoding not in ['UTF-8', 'utf-8']: try: sys.stdout.reconfigure(encoding='utf-8') except: pass print("中文显示设置完成") except Exception as e: print("中文设置错误:", e) def enhanced_critic_weights(data, target_indicators=None, cost_indicators=None, epsilon=1e-10): """ 增强版CRITIC权重计算函数 """ # 转换数据为NumPy数组 if isinstance(data, pd.DataFrame): df = data.copy() X = df.values.astype(np.float64) columns = df.columns.tolist() else: X = np.asarray(data, dtype=np.float64) columns = [f"Var_{i}" for i in range(X.shape[1])] # 处理成本型指标 cost_col_indices = [] if cost_indicators: for col in cost_indicators: if col in columns: cost_col_indices.append(columns.index(col)) # 处理靶心型指标 target_col_indices = [] if target_indicators: for col, target_val in target_indicators.items(): if col in columns: col_idx = columns.index(col) # 计算绝对偏差 |x - target| X[:, col_idx] = np.abs(X[:, col_idx] - target_val) # 将靶心型指标视为成本型 if col_idx not in cost_col_indices: cost_col_indices.append(col_idx) target_col_indices.append(col_idx) # 处理缺失值和无穷值 X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) X = np.where(np.isinf(X), np.nan, X) X = np.where(np.isnan(X), np.nanmean(X, axis=0), X) # 计算基本统计量 min_vals = np.nanmin(X, axis=0) max_vals = np.nanmax(X, axis=0) ranges = max_vals - min_vals ranges[ranges < epsilon] = epsilon # 标准化处理 Z = np.zeros_like(X) for j in range(X.shape[1]): if j in cost_col_indices: Z[:, j] = (max_vals[j] - X[:, j]) / ranges[j] else: Z[:, j] = (X[:, j] - min_vals[j]) / ranges[j] # 计算标准化值的和 standardized_sums = np.sum(Z, axis=0) # 计算标准差 std_dev = np.std(Z, axis=0, ddof=1) # 计算相关系数矩阵 corr_matrix = np.corrcoef(Z, rowvar=False) # 计算冲突性 conflict = np.sum(1 - np.abs(corr_matrix), axis=1) # 计算信息量 information = std_dev * conflict # 计算权重 weights = information / np.sum(information) # 整理结果信息 info = { "columns": columns, "std_dev": std_dev, "conflict": conflict, "information": information, "target_col_indices": target_col_indices, "cost_col_indices": cost_col_indices, "standardized_sums": standardized_sums, "standardized_data": Z } return weights, info def stability_analysis_with_noise(data, critic_func, noise_levels=[0.01, 0.05, 0.1], n_iter=100, **critic_kwargs): """ CRITIC权重稳定性分析 """ # 计算原始权重 original_weights, original_info = critic_func(data, **critic_kwargs) columns = original_info['columns'] # 初始化结果存储 stability_results = { 'noise_levels': noise_levels, 'original_weights': original_weights, 'weight_matrix': {}, 'stability_stats': {} } # 对每个噪声水平进行分析 for noise_level in noise_levels: weight_matrix = [] for i in range(n_iter): # 复制原始数据 if isinstance(data, pd.DataFrame): noisy_data = data.copy() else: noisy_data = data.copy() # 为每列添加独立高斯噪声 for j, col in enumerate(columns): if col in data.columns: std_dev = np.std(data[col]) noise = np.random.normal(0, noise_level * std_dev, size=len(data)) noisy_data[col] += noise # 计算带噪声数据的权重 weights, _ = critic_func(noisy_data, **critic_kwargs) weight_matrix.append(weights) # 转换为数组 weight_matrix = np.array(weight_matrix) stability_results['weight_matrix'][noise_level] = weight_matrix # 计算统计量 mean_weights = np.mean(weight_matrix, axis=0) std_weights = np.std(weight_matrix, axis=0) cv_weights = std_weights / np.where(mean_weights != 0, mean_weights, 1e-10) # 避免除零 # 创建DataFrame存储统计结果 stability_stats = pd.DataFrame({ '指标': columns, '原始权重': original_weights, '平均权重': mean_weights, '标准差': std_weights, '变异系数': cv_weights, '噪声水平': noise_level }) stability_results['stability_stats'][noise_level] = stability_stats # 可视化结果 visualize_stability_results(stability_results) # 保存报告 save_stability_report(stability_results, original_info) return stability_results # ... 之前的代码保持不变 ... def visualize_stability_results(results): """可视化稳定性分析结果(优化颜色处理)""" try: if os.name == 'nt': # Windows title_font = {'family': 'SimHei', 'size': 16} label_font = {'family': 'Microsoft YaHei', 'size': 14} else: # Mac/Linux title_font = {'family': 'WenQuanYi Micro Hei', 'size': 16} label_font = {'family': 'WenQuanYi Zen Hei', 'size': 14} except: title_font = {'size': 16} label_font = {'size': 14} plt.figure(figsize=(15, 10)) noise_levels = results['noise_levels'] columns = results['original_weights'].index if isinstance(results['original_weights'], pd.Series) else range( len(results['original_weights'])) # ========== 关键修改开始 ========== # # 1. 使用不同的颜色映射和线型组合 color_palette = plt.cm.viridis(np.linspace(0, 1, len(noise_levels))) linestyles = ['-', '--', '-.', ':'] markers = ['o', 's', 'D', '^', 'v', '<', '>', 'p', '*', 'h'] # ========== 关键修改结束 ========== # # 1. 权重变化箱线图(优化颜色和样式) plt.subplot(2, 2, 1) # ========== 关键修改开始 ========== # # 为每个噪声水平创建不同的颜色 for i, noise_level in enumerate(noise_levels): weight_matrix = results['weight_matrix'][noise_level] weight_changes = [] for j in range(weight_matrix.shape[1]): orig_weight = results['original_weights'][j] changes = (weight_matrix[:, j] - orig_weight) / orig_weight * 100 weight_changes.append(changes) # 为每个噪声水平使用不同的颜色和线型组合 box = plt.boxplot(weight_changes, positions=np.arange(len(columns)) + i * 0.2, widths=0.15, patch_artist=True, boxprops=dict(facecolor=color_palette[i], alpha=0.7), medianprops=dict(color='black', linewidth=1.5)) # ========== 关键修改结束 ========== # plt.axhline(y=0, color='r', linestyle='--') plt.xticks(np.arange(len(columns)), columns, rotation=45, ha='right', fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('权重变化百分比 (%)', fontproperties=label_font) plt.title('不同噪声水平下的权重变化', fontproperties=title_font) # ========== 关键修改开始 ========== # # 创建自定义图例 legend_handles = [plt.Rectangle((0, 0), 1, 1, color=color_palette[i], alpha=0.7) for i in range(len(noise_levels))] legend_labels = [f'{level * 100:.0f}%噪声' for level in noise_levels] plt.legend(legend_handles, legend_labels, title='噪声水平', prop=label_font, loc='upper right') # ========== 关键修改结束 ========== # # 2. 变异系数热力图(使用不同的颜色映射) plt.subplot(2, 2, 2) cv_matrix = [] for noise_level in noise_levels: stats_df = results['stability_stats'][noise_level] cv_matrix.append(stats_df['变异系数'].values) cv_matrix = np.array(cv_matrix) # ========== 关键修改开始 ========== # # 使用不同的颜色映射方案 ax = sns.heatmap(cv_matrix, annot=True, fmt=".3f", cmap="coolwarm", xticklabels=columns, yticklabels=[f'{level * 100:.0f}%' for level in noise_levels]) # ========== 关键修改结束 ========== # plt.setp(ax.get_xticklabels(), fontproperties=label_font) plt.setp(ax.get_yticklabels(), fontproperties=label_font) plt.xlabel('指标', fontproperties=label_font) plt.ylabel('噪声水平', fontproperties=label_font) plt.title('变异系数热力图', fontproperties=title_font) # 3. 权重排序稳定性(使用不同颜色) plt.subplot(2, 2, 3) rank_stability = [] for noise_level in noise_levels: weight_matrix = results['weight_matrix'][noise_level] rank_corrs = [] for i in range(weight_matrix.shape[0]): ranks = np.argsort(np.argsort(weight_matrix[i])) orig_ranks = np.argsort(np.argsort(results['original_weights'])) corr = np.corrcoef(ranks, orig_ranks)[0, 1] rank_corrs.append(corr) rank_stability.append(rank_corrs) # ========== 关键修改开始 ========== # # 使用不同的颜色绘制箱线图 box = plt.boxplot(rank_stability, patch_artist=True) # 为每个箱线图设置不同颜色 for i, patch in enumerate(box['boxes']): patch.set_facecolor(color_palette[i]) # 设置标签 plt.xticks(range(1, len(noise_levels) + 1), [f'{level * 100:.0f}%' for level in noise_levels]) # ========== 关键修改结束 ========== # plt.axhline(y=1, color='g', linestyle='--', alpha=0.7) plt.axhline(y=0.9, color='orange', linestyle='--', alpha=0.7) plt.title('权重排序稳定性', fontproperties=title_font) plt.xlabel('噪声水平', fontproperties=label_font) plt.ylabel('排名相关系数', fontproperties=label_font) # 4. 关键指标权重变化(使用不同颜色和标记) plt.subplot(2, 2, 4) top_indices = np.argsort(results['original_weights'])[-3:] top_columns = [columns[i] for i in top_indices] # ========== 关键修改开始 ========== # # 为每个关键指标使用不同的颜色和标记 for j, col in enumerate(top_columns): col_index = list(columns).index(col) weight_changes = [] for i, noise_level in enumerate(noise_levels): weight_matrix = results['weight_matrix'][noise_level] orig_weight = results['original_weights'][col_index] changes = (weight_matrix[:, col_index] - orig_weight) / orig_weight * 100 mean_abs_change = np.mean(np.abs(changes)) weight_changes.append(mean_abs_change) # 添加数据点标记 plt.scatter(noise_level, mean_abs_change, color=color_palette[i], marker=markers[j % len(markers)], s=80, alpha=0.7) # 使用不同的线型和颜色连接点 plt.plot(noise_levels, weight_changes, linestyle=linestyles[j % len(linestyles)], color=color_palette[j % len(color_palette)], linewidth=2, label=f'{col}') # ========== 关键修改结束 ========== # plt.title('关键指标权重变化', fontproperties=title_font) plt.xlabel('噪声水平', fontproperties=label_font) plt.ylabel('平均绝对变化百分比 (%)', fontproperties=label_font) plt.legend(prop=label_font) plt.grid(alpha=0.3) plt.tight_layout() try: plt.savefig('CRITIC权重稳定性分析.png', dpi=300, bbox_inches='tight') print("可视化图表已保存为 'CRITIC权重稳定性分析.png'") except Exception as e: print(f"保存图片时出错: {e}") try: plt.show() except Exception as e: print(f"显示图表时出错: {e}") # ... 之后的代码保持不变 ... def save_stability_report(results, info): """保存稳定性分析报告""" report = "CRITIC权重稳定性分析报告\n" report += "=" * 80 + "\n\n" # 1. 原始权重信息 report += "原始权重信息:\n" report += "-" * 50 + "\n" for i, col in enumerate(info['columns']): report += f"{col}: {results['original_weights'][i]:.6f} " report += f"(标准差: {info['std_dev'][i]:.4f}, " report += f"冲突性: {info['conflict'][i]:.4f}, " report += f"信息量: {info['information'][i]:.4f})\n" # 2. 稳定性统计 report += "\n\n稳定性分析结果:\n" report += "=" * 50 + "\n" for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] report += f"\n噪声水平: {noise_level * 100:.1f}%\n" report += "-" * 40 + "\n" report += f"{'指标':<20}{'原始权重':>12}{'平均权重':>12}{'标准差':>10}{'变异系数':>12}\n" for _, row in stats_df.iterrows(): report += f"{row['指标']:<20}{row['原始权重']:>12.6f}{row['平均权重']:>12.6f}" report += f"{row['标准差']:>10.6f}{row['变异系数']:>12.4f}\n" # 3. 稳定性评估 report += "\n\n稳定性评估:\n" report += "=" * 50 + "\n" # 计算整体稳定性指标 overall_stability = {} for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] avg_cv = stats_df['变异系数'].mean() max_cv = stats_df['变异系数'].max() overall_stability[noise_level] = { '平均变异系数': avg_cv, '最大变异系数': max_cv } # 评估稳定性 if max_cv < 0.1: stability_level = "极高稳定性" elif max_cv < 0.2: stability_level = "高稳定性" elif max_cv < 0.3: stability_level = "中等稳定性" else: stability_level = "低稳定性" report += (f"噪声水平 {noise_level * 100:.1f}%: " f"平均变异系数={avg_cv:.4f}, 最大变异系数={max_cv:.4f} → {stability_level}\n") # 4. 关键指标稳定性分析 report += "\n关键指标稳定性分析:\n" report += "-" * 50 + "\n" # 找出权重最大的3个指标 top_indices = np.argsort(results['original_weights'])[-3:] top_columns = [info['columns'][i] for i in top_indices] for col in top_columns: col_index = info['columns'].index(col) cv_values = [] for noise_level in results['noise_levels']: stats_df = results['stability_stats'][noise_level] cv = stats_df[stats_df['指标'] == col]['变异系数'].values[0] cv_values.append(cv) avg_cv = np.mean(cv_values) report += f"指标 '{col}' (权重={results['original_weights'][col_index]:.4f}): " report += f"平均变异系数={avg_cv:.4f} → " if avg_cv < 0.1: report += "稳定性优异\n" elif avg_cv < 0.2: report += "稳定性良好\n" elif avg_cv < 0.3: report += "稳定性一般\n" else: report += "稳定性较差\n" # 保存报告 try: with open('CRITIC_稳定性分析报告.txt', 'w', encoding='utf-8') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (UTF-8编码)") except: try: with open('CRITIC_稳定性分析报告.txt', 'w') as f: f.write(report) print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (系统默认编码)") except Exception as e: print(f"保存报告时出错: {e}") print("\n" + "=" * 80) print("CRITIC权重稳定性分析报告(控制台输出)") print("=" * 80) print(report) def load_custom_data(file_path): """ 加载自定义CSV数据文件 """ try: # 尝试多种常见编码 encodings = ['utf-8', 'gbk', 'latin1', 'iso-8859-1'] for encoding in encodings: try: data = pd.read_csv(file_path, encoding=encoding) print(f"使用 {encoding} 编码成功加载数据") return data except UnicodeDecodeError: continue # 如果所有编码都失败,尝试自动检测 try: import chardet with open(file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] data = pd.read_csv(file_path, encoding=encoding) print(f"使用自动检测的 {encoding} 编码成功加载数据") return data except: raise ValueError("无法确定文件编码,请手动指定") except Exception as e: print(f"加载文件时出错: {e}") return None def validate_data(data): """ 验证数据质量 """ if data is None: return False print("\n数据验证:") print(f"- 数据维度: {data.shape[0]}行 × {data.shape[1]}列") # 检查缺失值 missing_values = data.isnull().sum().sum() if missing_values > 0: print(f"- 警告: 发现 {missing_values} 个缺失值,将使用列均值填充") # 检查非数值列 non_numeric_cols = data.select_dtypes(exclude=['number']).columns.tolist() if non_numeric_cols: print(f"- 错误: 发现非数值列: {non_numeric_cols}") print(" 请确保所有列都是数值类型") return False # 检查列数 if len(data.columns) < 2: print("- 错误: 数据至少需要2列指标") return False # 检查行数 if len(data) < 10: print("- 警告: 数据行数较少 (<10),结果可能不可靠") return True # 主程序入口 if __name__ == "__main__": print("=" * 80) print("CRITIC权重稳定性分析程序") print("=" * 80) # 1. 加载自定义CSV数据 data_file = "xunlian.csv" # 替换为您的CSV文件路径 custom_data = load_custom_data(data_file) if custom_data is None: print("无法加载数据,程序终止") exit(1) # 2. 验证数据质量 if not validate_data(custom_data): print("数据验证失败,请修正数据后重试") exit(1) print("\n数据前5行预览:") print(custom_data.head()) # 3. 指定指标类型 # 靶心型指标:字典形式 {指标列名: 目标值} target_spec = { "凝结水过冷度": 0, # 我们希望凝结水过冷度越接近0越好 "炉膛负压": -100 # 我们希望炉膛负压越接近-100越好 } # 成本型指标:列表形式,值越小越好 cost_indicators = [ "低压缸排汽压力", "发电机定子绕组温度", "SO2", "NOx", "烟尘", "供电煤耗率", "厂用电率", "Co2" ] # 4. 确保指定的列名在数据中存在 all_columns = custom_data.columns.tolist() missing_targets = [col for col in target_spec if col not in all_columns] missing_costs = [col for col in cost_indicators if col not in all_columns] if missing_targets: print(f"警告: 以下靶心型指标在数据中不存在: {missing_targets}") if missing_costs: print(f"警告: 以下成本型指标在数据中不存在: {missing_costs}") # 5. 计算原始权重 print("\n计算CRITIC权重中...") weights, info = enhanced_critic_weights( custom_data, target_indicators=target_spec, cost_indicators=cost_indicators ) # 打印原始权重 print("\n原始权重分配:") for col, weight in zip(info["columns"], weights): print(f"{col}: {weight:.6f}") # 6. 进行稳定性分析 print("\n进行稳定性分析...") stability_results = stability_analysis_with_noise( custom_data, critic_func=enhanced_critic_weights, noise_levels=[0.01, 0.03, 0.05], # 1%, 3%, 5%, 10%噪声 n_iter=50, # 迭代次数(可根据需要调整) target_indicators=target_spec, cost_indicators=cost_indicators ) print("\n分析完成!") print("结果已保存到:") print("- CRITIC权重稳定性分析.png") print("- CRITIC_稳定性分析报告.txt") 我要修改代码让它只绘制不同噪声水平下的权重变化箱线图和变异系数热力图,并且这两张图要分开
11-18
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值