import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import locale
import sys
from pathlib import Path
# 解决中文乱码问题
try:
# 设置matplotlib中文显示
if os.name == 'nt': # Windows系统
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
else: # Mac/Linux系统
plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 12
# 设置控制台编码
if hasattr(sys.stdout, 'encoding') and sys.stdout.encoding not in ['UTF-8', 'utf-8']:
try:
sys.stdout.reconfigure(encoding='utf-8')
except:
pass
print("中文显示设置完成")
except Exception as e:
print("中文设置错误:", e)
def enhanced_critic_weights(data,
target_indicators=None,
cost_indicators=None,
epsilon=1e-10):
"""
增强版CRITIC权重计算函数
"""
# 转换数据为NumPy数组
if isinstance(data, pd.DataFrame):
df = data.copy()
X = df.values.astype(np.float64)
columns = df.columns.tolist()
else:
X = np.asarray(data, dtype=np.float64)
columns = [f"Var_{i}" for i in range(X.shape[1])]
# 处理成本型指标
cost_col_indices = []
if cost_indicators:
for col in cost_indicators:
if col in columns:
cost_col_indices.append(columns.index(col))
# 处理靶心型指标
target_col_indices = []
if target_indicators:
for col, target_val in target_indicators.items():
if col in columns:
col_idx = columns.index(col)
# 计算绝对偏差 |x - target|
X[:, col_idx] = np.abs(X[:, col_idx] - target_val)
# 将靶心型指标视为成本型
if col_idx not in cost_col_indices:
cost_col_indices.append(col_idx)
target_col_indices.append(col_idx)
# 处理缺失值和无穷值
X = np.where(np.isnan(X), np.nanmean(X, axis=0), X)
X = np.where(np.isinf(X), np.nan, X)
X = np.where(np.isnan(X), np.nanmean(X, axis=0), X)
# 计算基本统计量
min_vals = np.nanmin(X, axis=0)
max_vals = np.nanmax(X, axis=0)
ranges = max_vals - min_vals
ranges[ranges < epsilon] = epsilon
# 标准化处理
Z = np.zeros_like(X)
for j in range(X.shape[1]):
if j in cost_col_indices:
Z[:, j] = (max_vals[j] - X[:, j]) / ranges[j]
else:
Z[:, j] = (X[:, j] - min_vals[j]) / ranges[j]
# 计算标准化值的和
standardized_sums = np.sum(Z, axis=0)
# 计算标准差
std_dev = np.std(Z, axis=0, ddof=1)
# 计算相关系数矩阵
corr_matrix = np.corrcoef(Z, rowvar=False)
# 计算冲突性
conflict = np.sum(1 - np.abs(corr_matrix), axis=1)
# 计算信息量
information = std_dev * conflict
# 计算权重
weights = information / np.sum(information)
# 整理结果信息
info = {
"columns": columns,
"std_dev": std_dev,
"conflict": conflict,
"information": information,
"target_col_indices": target_col_indices,
"cost_col_indices": cost_col_indices,
"standardized_sums": standardized_sums,
"standardized_data": Z
}
return weights, info
def stability_analysis_with_noise(data, critic_func, noise_levels=[0.01, 0.05, 0.1],
n_iter=100, **critic_kwargs):
"""
CRITIC权重稳定性分析
"""
# 计算原始权重
original_weights, original_info = critic_func(data, **critic_kwargs)
columns = original_info['columns']
# 初始化结果存储
stability_results = {
'noise_levels': noise_levels,
'original_weights': original_weights,
'weight_matrix': {},
'stability_stats': {}
}
# 对每个噪声水平进行分析
for noise_level in noise_levels:
weight_matrix = []
for i in range(n_iter):
# 复制原始数据
if isinstance(data, pd.DataFrame):
noisy_data = data.copy()
else:
noisy_data = data.copy()
# 为每列添加独立高斯噪声
for j, col in enumerate(columns):
if col in data.columns:
std_dev = np.std(data[col])
noise = np.random.normal(0, noise_level * std_dev, size=len(data))
noisy_data[col] += noise
# 计算带噪声数据的权重
weights, _ = critic_func(noisy_data, **critic_kwargs)
weight_matrix.append(weights)
# 转换为数组
weight_matrix = np.array(weight_matrix)
stability_results['weight_matrix'][noise_level] = weight_matrix
# 计算统计量
mean_weights = np.mean(weight_matrix, axis=0)
std_weights = np.std(weight_matrix, axis=0)
cv_weights = std_weights / np.where(mean_weights != 0, mean_weights, 1e-10) # 避免除零
# 创建DataFrame存储统计结果
stability_stats = pd.DataFrame({
'指标': columns,
'原始权重': original_weights,
'平均权重': mean_weights,
'标准差': std_weights,
'变异系数': cv_weights,
'噪声水平': noise_level
})
stability_results['stability_stats'][noise_level] = stability_stats
# 可视化结果
visualize_stability_results(stability_results)
# 保存报告
save_stability_report(stability_results, original_info)
return stability_results
# ... 之前的代码保持不变 ...
def visualize_stability_results(results):
"""可视化稳定性分析结果(优化颜色处理)"""
try:
if os.name == 'nt': # Windows
title_font = {'family': 'SimHei', 'size': 16}
label_font = {'family': 'Microsoft YaHei', 'size': 14}
else: # Mac/Linux
title_font = {'family': 'WenQuanYi Micro Hei', 'size': 16}
label_font = {'family': 'WenQuanYi Zen Hei', 'size': 14}
except:
title_font = {'size': 16}
label_font = {'size': 14}
plt.figure(figsize=(15, 10))
noise_levels = results['noise_levels']
columns = results['original_weights'].index if isinstance(results['original_weights'], pd.Series) else range(
len(results['original_weights']))
# ========== 关键修改开始 ========== #
# 1. 使用不同的颜色映射和线型组合
color_palette = plt.cm.viridis(np.linspace(0, 1, len(noise_levels)))
linestyles = ['-', '--', '-.', ':']
markers = ['o', 's', 'D', '^', 'v', '<', '>', 'p', '*', 'h']
# ========== 关键修改结束 ========== #
# 1. 权重变化箱线图(优化颜色和样式)
plt.subplot(2, 2, 1)
# ========== 关键修改开始 ========== #
# 为每个噪声水平创建不同的颜色
for i, noise_level in enumerate(noise_levels):
weight_matrix = results['weight_matrix'][noise_level]
weight_changes = []
for j in range(weight_matrix.shape[1]):
orig_weight = results['original_weights'][j]
changes = (weight_matrix[:, j] - orig_weight) / orig_weight * 100
weight_changes.append(changes)
# 为每个噪声水平使用不同的颜色和线型组合
box = plt.boxplot(weight_changes, positions=np.arange(len(columns)) + i * 0.2,
widths=0.15, patch_artist=True,
boxprops=dict(facecolor=color_palette[i], alpha=0.7),
medianprops=dict(color='black', linewidth=1.5))
# ========== 关键修改结束 ========== #
plt.axhline(y=0, color='r', linestyle='--')
plt.xticks(np.arange(len(columns)), columns, rotation=45, ha='right', fontproperties=label_font)
plt.xlabel('指标', fontproperties=label_font)
plt.ylabel('权重变化百分比 (%)', fontproperties=label_font)
plt.title('不同噪声水平下的权重变化', fontproperties=title_font)
# ========== 关键修改开始 ========== #
# 创建自定义图例
legend_handles = [plt.Rectangle((0, 0), 1, 1, color=color_palette[i], alpha=0.7)
for i in range(len(noise_levels))]
legend_labels = [f'{level * 100:.0f}%噪声' for level in noise_levels]
plt.legend(legend_handles, legend_labels, title='噪声水平',
prop=label_font, loc='upper right')
# ========== 关键修改结束 ========== #
# 2. 变异系数热力图(使用不同的颜色映射)
plt.subplot(2, 2, 2)
cv_matrix = []
for noise_level in noise_levels:
stats_df = results['stability_stats'][noise_level]
cv_matrix.append(stats_df['变异系数'].values)
cv_matrix = np.array(cv_matrix)
# ========== 关键修改开始 ========== #
# 使用不同的颜色映射方案
ax = sns.heatmap(cv_matrix, annot=True, fmt=".3f", cmap="coolwarm",
xticklabels=columns, yticklabels=[f'{level * 100:.0f}%' for level in noise_levels])
# ========== 关键修改结束 ========== #
plt.setp(ax.get_xticklabels(), fontproperties=label_font)
plt.setp(ax.get_yticklabels(), fontproperties=label_font)
plt.xlabel('指标', fontproperties=label_font)
plt.ylabel('噪声水平', fontproperties=label_font)
plt.title('变异系数热力图', fontproperties=title_font)
# 3. 权重排序稳定性(使用不同颜色)
plt.subplot(2, 2, 3)
rank_stability = []
for noise_level in noise_levels:
weight_matrix = results['weight_matrix'][noise_level]
rank_corrs = []
for i in range(weight_matrix.shape[0]):
ranks = np.argsort(np.argsort(weight_matrix[i]))
orig_ranks = np.argsort(np.argsort(results['original_weights']))
corr = np.corrcoef(ranks, orig_ranks)[0, 1]
rank_corrs.append(corr)
rank_stability.append(rank_corrs)
# ========== 关键修改开始 ========== #
# 使用不同的颜色绘制箱线图
box = plt.boxplot(rank_stability, patch_artist=True)
# 为每个箱线图设置不同颜色
for i, patch in enumerate(box['boxes']):
patch.set_facecolor(color_palette[i])
# 设置标签
plt.xticks(range(1, len(noise_levels) + 1),
[f'{level * 100:.0f}%' for level in noise_levels])
# ========== 关键修改结束 ========== #
plt.axhline(y=1, color='g', linestyle='--', alpha=0.7)
plt.axhline(y=0.9, color='orange', linestyle='--', alpha=0.7)
plt.title('权重排序稳定性', fontproperties=title_font)
plt.xlabel('噪声水平', fontproperties=label_font)
plt.ylabel('排名相关系数', fontproperties=label_font)
# 4. 关键指标权重变化(使用不同颜色和标记)
plt.subplot(2, 2, 4)
top_indices = np.argsort(results['original_weights'])[-3:]
top_columns = [columns[i] for i in top_indices]
# ========== 关键修改开始 ========== #
# 为每个关键指标使用不同的颜色和标记
for j, col in enumerate(top_columns):
col_index = list(columns).index(col)
weight_changes = []
for i, noise_level in enumerate(noise_levels):
weight_matrix = results['weight_matrix'][noise_level]
orig_weight = results['original_weights'][col_index]
changes = (weight_matrix[:, col_index] - orig_weight) / orig_weight * 100
mean_abs_change = np.mean(np.abs(changes))
weight_changes.append(mean_abs_change)
# 添加数据点标记
plt.scatter(noise_level, mean_abs_change,
color=color_palette[i],
marker=markers[j % len(markers)],
s=80, alpha=0.7)
# 使用不同的线型和颜色连接点
plt.plot(noise_levels, weight_changes,
linestyle=linestyles[j % len(linestyles)],
color=color_palette[j % len(color_palette)],
linewidth=2,
label=f'{col}')
# ========== 关键修改结束 ========== #
plt.title('关键指标权重变化', fontproperties=title_font)
plt.xlabel('噪声水平', fontproperties=label_font)
plt.ylabel('平均绝对变化百分比 (%)', fontproperties=label_font)
plt.legend(prop=label_font)
plt.grid(alpha=0.3)
plt.tight_layout()
try:
plt.savefig('CRITIC权重稳定性分析.png', dpi=300, bbox_inches='tight')
print("可视化图表已保存为 'CRITIC权重稳定性分析.png'")
except Exception as e:
print(f"保存图片时出错: {e}")
try:
plt.show()
except Exception as e:
print(f"显示图表时出错: {e}")
# ... 之后的代码保持不变 ...
def save_stability_report(results, info):
"""保存稳定性分析报告"""
report = "CRITIC权重稳定性分析报告\n"
report += "=" * 80 + "\n\n"
# 1. 原始权重信息
report += "原始权重信息:\n"
report += "-" * 50 + "\n"
for i, col in enumerate(info['columns']):
report += f"{col}: {results['original_weights'][i]:.6f} "
report += f"(标准差: {info['std_dev'][i]:.4f}, "
report += f"冲突性: {info['conflict'][i]:.4f}, "
report += f"信息量: {info['information'][i]:.4f})\n"
# 2. 稳定性统计
report += "\n\n稳定性分析结果:\n"
report += "=" * 50 + "\n"
for noise_level in results['noise_levels']:
stats_df = results['stability_stats'][noise_level]
report += f"\n噪声水平: {noise_level * 100:.1f}%\n"
report += "-" * 40 + "\n"
report += f"{'指标':<20}{'原始权重':>12}{'平均权重':>12}{'标准差':>10}{'变异系数':>12}\n"
for _, row in stats_df.iterrows():
report += f"{row['指标']:<20}{row['原始权重']:>12.6f}{row['平均权重']:>12.6f}"
report += f"{row['标准差']:>10.6f}{row['变异系数']:>12.4f}\n"
# 3. 稳定性评估
report += "\n\n稳定性评估:\n"
report += "=" * 50 + "\n"
# 计算整体稳定性指标
overall_stability = {}
for noise_level in results['noise_levels']:
stats_df = results['stability_stats'][noise_level]
avg_cv = stats_df['变异系数'].mean()
max_cv = stats_df['变异系数'].max()
overall_stability[noise_level] = {
'平均变异系数': avg_cv,
'最大变异系数': max_cv
}
# 评估稳定性
if max_cv < 0.1:
stability_level = "极高稳定性"
elif max_cv < 0.2:
stability_level = "高稳定性"
elif max_cv < 0.3:
stability_level = "中等稳定性"
else:
stability_level = "低稳定性"
report += (f"噪声水平 {noise_level * 100:.1f}%: "
f"平均变异系数={avg_cv:.4f}, 最大变异系数={max_cv:.4f} → {stability_level}\n")
# 4. 关键指标稳定性分析
report += "\n关键指标稳定性分析:\n"
report += "-" * 50 + "\n"
# 找出权重最大的3个指标
top_indices = np.argsort(results['original_weights'])[-3:]
top_columns = [info['columns'][i] for i in top_indices]
for col in top_columns:
col_index = info['columns'].index(col)
cv_values = []
for noise_level in results['noise_levels']:
stats_df = results['stability_stats'][noise_level]
cv = stats_df[stats_df['指标'] == col]['变异系数'].values[0]
cv_values.append(cv)
avg_cv = np.mean(cv_values)
report += f"指标 '{col}' (权重={results['original_weights'][col_index]:.4f}): "
report += f"平均变异系数={avg_cv:.4f} → "
if avg_cv < 0.1:
report += "稳定性优异\n"
elif avg_cv < 0.2:
report += "稳定性良好\n"
elif avg_cv < 0.3:
report += "稳定性一般\n"
else:
report += "稳定性较差\n"
# 保存报告
try:
with open('CRITIC_稳定性分析报告.txt', 'w', encoding='utf-8') as f:
f.write(report)
print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (UTF-8编码)")
except:
try:
with open('CRITIC_稳定性分析报告.txt', 'w') as f:
f.write(report)
print("稳定性分析报告已保存为 'CRITIC_稳定性分析报告.txt' (系统默认编码)")
except Exception as e:
print(f"保存报告时出错: {e}")
print("\n" + "=" * 80)
print("CRITIC权重稳定性分析报告(控制台输出)")
print("=" * 80)
print(report)
def load_custom_data(file_path):
"""
加载自定义CSV数据文件
"""
try:
# 尝试多种常见编码
encodings = ['utf-8', 'gbk', 'latin1', 'iso-8859-1']
for encoding in encodings:
try:
data = pd.read_csv(file_path, encoding=encoding)
print(f"使用 {encoding} 编码成功加载数据")
return data
except UnicodeDecodeError:
continue
# 如果所有编码都失败,尝试自动检测
try:
import chardet
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
encoding = result['encoding']
data = pd.read_csv(file_path, encoding=encoding)
print(f"使用自动检测的 {encoding} 编码成功加载数据")
return data
except:
raise ValueError("无法确定文件编码,请手动指定")
except Exception as e:
print(f"加载文件时出错: {e}")
return None
def validate_data(data):
"""
验证数据质量
"""
if data is None:
return False
print("\n数据验证:")
print(f"- 数据维度: {data.shape[0]}行 × {data.shape[1]}列")
# 检查缺失值
missing_values = data.isnull().sum().sum()
if missing_values > 0:
print(f"- 警告: 发现 {missing_values} 个缺失值,将使用列均值填充")
# 检查非数值列
non_numeric_cols = data.select_dtypes(exclude=['number']).columns.tolist()
if non_numeric_cols:
print(f"- 错误: 发现非数值列: {non_numeric_cols}")
print(" 请确保所有列都是数值类型")
return False
# 检查列数
if len(data.columns) < 2:
print("- 错误: 数据至少需要2列指标")
return False
# 检查行数
if len(data) < 10:
print("- 警告: 数据行数较少 (<10),结果可能不可靠")
return True
# 主程序入口
if __name__ == "__main__":
print("=" * 80)
print("CRITIC权重稳定性分析程序")
print("=" * 80)
# 1. 加载自定义CSV数据
data_file = "xunlian.csv" # 替换为您的CSV文件路径
custom_data = load_custom_data(data_file)
if custom_data is None:
print("无法加载数据,程序终止")
exit(1)
# 2. 验证数据质量
if not validate_data(custom_data):
print("数据验证失败,请修正数据后重试")
exit(1)
print("\n数据前5行预览:")
print(custom_data.head())
# 3. 指定指标类型
# 靶心型指标:字典形式 {指标列名: 目标值}
target_spec = {
"凝结水过冷度": 0, # 我们希望凝结水过冷度越接近0越好
"炉膛负压": -100 # 我们希望炉膛负压越接近-100越好
}
# 成本型指标:列表形式,值越小越好
cost_indicators = [
"低压缸排汽压力",
"发电机定子绕组温度",
"SO2",
"NOx",
"烟尘",
"供电煤耗率",
"厂用电率",
"Co2"
]
# 4. 确保指定的列名在数据中存在
all_columns = custom_data.columns.tolist()
missing_targets = [col for col in target_spec if col not in all_columns]
missing_costs = [col for col in cost_indicators if col not in all_columns]
if missing_targets:
print(f"警告: 以下靶心型指标在数据中不存在: {missing_targets}")
if missing_costs:
print(f"警告: 以下成本型指标在数据中不存在: {missing_costs}")
# 5. 计算原始权重
print("\n计算CRITIC权重中...")
weights, info = enhanced_critic_weights(
custom_data,
target_indicators=target_spec,
cost_indicators=cost_indicators
)
# 打印原始权重
print("\n原始权重分配:")
for col, weight in zip(info["columns"], weights):
print(f"{col}: {weight:.6f}")
# 6. 进行稳定性分析
print("\n进行稳定性分析...")
stability_results = stability_analysis_with_noise(
custom_data,
critic_func=enhanced_critic_weights,
noise_levels=[0.01, 0.03, 0.05], # 1%, 3%, 5%, 10%噪声
n_iter=50, # 迭代次数(可根据需要调整)
target_indicators=target_spec,
cost_indicators=cost_indicators
)
print("\n分析完成!")
print("结果已保存到:")
print("- CRITIC权重稳定性分析.png")
print("- CRITIC_稳定性分析报告.txt")
我要修改代码让它只绘制不同噪声水平下的权重变化箱线图和变异系数热力图,并且这两张图要分开
最新发布