这里是为什么错了:Traceback (most recent call last):
File "D:\Users\lenovo\Desktop\新的代码.py", line 302, in <module>
intermediate, total_output, sectors = load_io_data(io_file)
File "D:\Users\lenovo\Desktop\新的代码.py", line 31, in load_io_data
raise ValueError(f"部门数量不匹配!期望153个部门,实际得到{len(sectors)}个部门,矩阵形状{intermediate.shape}")
ValueError: 部门数量不匹配!期望153个部门,实际得到153个部门,矩阵形状(153, 163)
我的代码现在是这样:import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from openpyxl import load_workbook
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# ====================
# 1. 数据加载与预处理(修改版)
# ====================
def load_io_data(file_path):
"""加载投入产出表Excel数据"""
wb = load_workbook(file_path)
sheet = wb['2020年全国投入产出表'] # 假设投入产出表在Sheet1
# 读取中间投入矩阵(现在是153×153部门)
intermediate = np.array([[cell.value for cell in row]
for row in sheet['F7:FL159']]) # 修改范围为B7:FD159
# 读取总产出向量(153个部门)
total_output = np.array([cell.value for row in sheet['FM7:FM159']
for cell in row])
# 创建部门名称列表(153个部门)
sectors = [cell.value for row in sheet['B7:B159'] for cell in row]
if len(sectors) != 153 or intermediate.shape != (153, 153):
raise ValueError(f"部门数量不匹配!期望153个部门,实际得到{len(sectors)}个部门,矩阵形状{intermediate.shape}")
# 关键修改:只处理NaN值,保留真实零值
# 将NaN替换为0,但保留原有的0值
intermediate = np.nan_to_num(intermediate, nan=0.0)
# 确保total_output中的NaN也处理
total_output = np.nan_to_num(total_output, nan=0.0)
print(f"✓ 成功加载153×153投入产出矩阵")
print(f"✓ 总元素数: {intermediate.size:,}")
print(f"✓ 零值数量: {np.count_nonzero(intermediate == 0):,}")
print(f"✓ NaN值已替换为0")
return intermediate, total_output, sectors
# ====================
# 2. 核心计算函数(修改版)
# ====================
def calc_coefficients(intermediate, total_output):
"""计算各类系数矩阵 - 修正零值处理"""
results = {}
# 1. 直接消耗系数矩阵 (A)
# 关键修改:安全处理零产出部门
safe_total_output = np.where(total_output > 0, total_output, 1.0) # 避免除以零
diag_inv = np.diag(1 / safe_total_output)
A_matrix = intermediate @ diag_inv
# 特殊处理:对于总产出为零的部门,其直接消耗系数设为0
zero_output_mask = total_output == 0
A_matrix[zero_output_mask, :] = 0
results['direct_consumption'] = A_matrix
# 2. 分配系数矩阵 (D)
intermediate_trans = intermediate.T
D_matrix = intermediate_trans @ diag_inv
# 同样处理零产出情况
D_matrix[:, zero_output_mask] = 0
results['distribution'] = D_matrix
# 3. 中间投入率
row_sums = np.sum(intermediate, axis=1)
# 安全除法
results['intermediate_input_rate'] = np.where(
total_output > 0,
row_sums / total_output,
0 # 零产出部门的投入率为0
)
# 4. 中间需求率
col_sums = np.sum(intermediate, axis=0)
results['intermediate_demand_rate'] = np.where(
total_output > 0,
col_sums / total_output,
0
)
# 5. 感应度系数 (灵敏度系数)
A = results['direct_consumption']
I = np.identity(A.shape[0])
try:
# 检查矩阵是否可逆
if np.linalg.det(I - A) == 0:
print("⚠️ 技术系数矩阵不可逆,使用伪逆")
leontief_inv = np.linalg.pinv(I - A)
else:
leontief_inv = np.linalg.inv(I - A)
row_sums = np.sum(leontief_inv, axis=1)
results['sensitivity_coeff'] = row_sums / np.mean(row_sums)
except np.linalg.LinAlgError as e:
print(f"❌ 矩阵求逆失败: {e}")
print("使用替代方法计算感应度系数...")
# 使用近似方法
results['sensitivity_coeff'] = np.ones(len(total_output))
# 添加部门状态信息
results['sector_status'] = {
'active_count': np.count_nonzero(total_output > 0),
'zero_output_count': np.count_nonzero(total_output == 0),
'zero_output_indices': np.where(total_output == 0)[0].tolist()
}
return results
# ====================
# 3. 可视化函数(增强版)
# ====================
def visualize_results(results, sectors, top_n=10):
"""生成可视化图表 - 包含零值部门信息"""
fig = plt.figure(figsize=(20, 15), dpi=300)
# 获取结果
direct_consumption = results['direct_consumption']
sensitivity_coeff = results['sensitivity_coeff']
input_rate = results['intermediate_input_rate']
demand_rate = results['intermediate_demand_rate']
# 显示零产出部门信息
zero_output_info = results['sector_status']
print(f"\n📊 部门状态分析:")
print(f" 活跃部门: {zero_output_info['active_count']}")
print(f" 零产出部门: {zero_output_info['zero_output_count']}")
if zero_output_info['zero_output_indices']:
zero_names = [sectors[i] for i in zero_output_info['zero_output_indices']]
print(f" 零产出部门: {zero_names[:5]}{'...' if len(zero_names) > 5 else ''}")
# 1. 直接消耗系数热力图(前10部门)
plt.subplot(3, 3, 1)
top_indices = np.argsort(-input_rate)[:top_n]
top_sectors = [sectors[i] for i in top_indices]
sns.heatmap(direct_consumption[top_indices][:, top_indices],
annot=True, fmt=".3f", cmap="YlGnBu",
xticklabels=top_sectors, yticklabels=top_sectors)
plt.title(f'前{top_n}部门直接消耗系数矩阵', fontsize=12)
plt.xticks(rotation=45)
# 2. 感应度系数柱状图
plt.subplot(3, 3, 2)
sorted_idx = np.argsort(-sensitivity_coeff)
colors = ['red' if i in zero_output_info['zero_output_indices'] else 'blue'
for i in sorted_idx[:top_n]]
bars = plt.barh([sectors[i] for i in sorted_idx[:top_n]],
sensitivity_coeff[sorted_idx[:top_n]],
color=colors, alpha=0.7)
plt.axvline(1, color='r', linestyle='--', linewidth=1)
# 添加图例
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='blue', label='活跃部门'),
Patch(facecolor='red', label='零产出部门')]
plt.legend(handles=legend_elements, loc='lower right')
plt.title('部门感应度系数排名', fontsize=12)
plt.xlabel('系数值')
plt.tight_layout()
# 3. 中间投入率/需求率对比
plt.subplot(3, 3, 3)
scatter_colors = ['red' if total_output[i] == 0 else 'blue'
for i in range(len(sectors))]
plt.scatter(input_rate, demand_rate, s=50, c=scatter_colors, alpha=0.6)
plt.axhline(0.5, color='gray', linestyle='--', alpha=0.5)
plt.axvline(0.5, color='gray', linestyle='--', alpha=0.5)
# 添加图例
legend_elements = [plt.Line2D([0], [0], marker='o', color='w',
markerfacecolor='blue', markersize=8, label='活跃部门'),
plt.Line2D([0], [0], marker='o', color='w',
markerfacecolor='red', markersize=8, label='零产出部门')]
plt.legend(handles=legend_elements, loc='best')
plt.title('中间投入率 vs 中间需求率\n(红色=零产出部门)', fontsize=12)
plt.xlabel('中间投入率')
plt.ylabel('中间需求率')
# 4. 分配系数分布
plt.subplot(3, 3, 4)
non_zero_dist = results['distribution'][results['distribution'] > 0]
sns.histplot(non_zero_dist.flatten(), bins=30, kde=True, alpha=0.7)
plt.title('非零分配系数分布', fontsize=12)
plt.xlabel('分配系数值')
# 5. 部门状态饼图
plt.subplot(3, 3, 5)
sizes = [zero_output_info['active_count'], zero_output_info['zero_output_count']]
labels = [f'活跃部门 ({sizes[0]})', f'零产出部门 ({sizes[1]})']
colors = ['#66b3ff', '#ff9999']
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
plt.title('部门状态分布', fontsize=12)
# 6. 感应度系数分布
plt.subplot(3, 3, 6)
sns.boxplot(y=sensitivity_coeff)
plt.title('感应度系数分布', fontsize=12)
plt.ylabel('感应度系数')
# 7. 零值交易热力图(采样)
plt.subplot(3, 3, 7)
sample_size = min(30, intermediate.shape[0])
sample_indices = np.random.choice(153, sample_size, replace=False)
sample_indices.sort()
sample_sectors = [sectors[i] for i in sample_indices]
zero_heatmap = (intermediate[sample_indices][:, sample_indices] == 0).astype(int)
sns.heatmap(zero_heatmap, cmap='binary', xticklabels=sample_sectors,
yticklabels=sample_sectors, cbar_kws={'label': '0=有交易, 1=零交易'})
plt.title(f'随机采样{sample_size}×{sample_size}零值交易分布', fontsize=12)
plt.xticks(rotation=45)
# 8. 总产出分布
plt.subplot(3, 3, 8)
active_outputs = total_output[total_output > 0]
if len(active_outputs) > 0:
sns.histplot(active_outputs, bins=20, kde=True)
plt.title('活跃部门总产出分布', fontsize=12)
plt.xlabel('总产出')
else:
plt.text(0.5, 0.5, '无活跃部门数据', ha='center', va='center')
plt.title('总产出分布', fontsize=12)
plt.tight_layout()
plt.savefig('io_analysis_full.png', bbox_inches='tight', dpi=300)
return fig
# ====================
# 4. 数据质量检查函数(新增)
# ====================
def check_data_quality(intermediate, total_output, sectors):
"""检查数据质量并提供详细报告"""
print("="*60)
print("📊 投入产出数据质量检查报告")
print("="*60)
# 基本信息
print(f"矩阵形状: {intermediate.shape}")
print(f"部门数量: {len(sectors)}")
# 零值分析
total_elements = intermediate.size
zero_transactions = np.count_nonzero(intermediate == 0)
zero_ratio = zero_transactions / total_elements
print(f"\n🔢 零值分析:")
print(f" 总交易数: {total_elements:,}")
print(f" 零值交易: {zero_transactions:,} ({zero_ratio:.1%})")
print(f" 非零交易: {total_elements - zero_transactions:,}")
# 部门活动分析
zero_output_count = np.count_nonzero(total_output == 0)
active_count = len(total_output) - zero_output_count
print(f"\n🏭 部门活动分析:")
print(f" 活跃部门: {active_count}")
print(f" 零产出部门: {zero_output_count}")
if zero_output_count > 0:
zero_indices = np.where(total_output == 0)[0]
zero_names = [f"{sectors[i]}(索引{i})" for i in zero_indices[:10]]
print(f" 零产出部门: {zero_names}")
if len(zero_names) < zero_output_count:
print(f" ... 还有{zero_output_count-10}个")
# 负值检查
negative_count = np.count_nonzero(intermediate < 0)
if negative_count > 0:
print(f"\n⚠️ 发现 {negative_count} 个负值!请检查数据合理性")
# NaN值检查
nan_count = np.count_nonzero(np.isnan(intermediate)) + np.count_nonzero(np.isnan(total_output))
if nan_count > 0:
print(f"\n❌ 发现 {nan_count} 个NaN值!")
else:
print(f"\n✅ 无NaN值")
print("="*60)
# ====================
# 5. 主程序(修改版)
# ====================
if __name__ == "__main__":
# 数据路径
io_file = "2020.xlsx"
try:
# 加载真实数据
intermediate, total_output, sectors = load_io_data(io_file)
except FileNotFoundError:
print(f"⚠️ 文件 {io_file} 未找到,使用模拟数据演示")
# 模拟153×153数据
np.random.seed(42)
intermediate = np.random.rand(153, 153) * 1000
# 设置一些真实零值(模拟某些部门间无交易)
zero_positions = np.random.choice(153*153, size=int(153*153*0.3), replace=False)
zero_rows, zero_cols = np.unravel_index(zero_positions, (153, 153))
intermediate[zero_rows, zero_cols] = 0
# 模拟总产出
total_output = np.sum(intermediate, axis=1) + np.random.rand(153) * 5000
# 设置一些部门总产出为零
zero_output_indices = [42, 88, 120]
for idx in zero_output_indices:
intermediate[idx, :] = 0
intermediate[:, idx] = 0
total_output[idx] = 0
sectors = [f"部门_{i+1}" for i in range(153)]
print(f"✓ 已生成模拟153×153数据用于演示")
# 数据质量检查
check_data_quality(intermediate, total_output, sectors)
# 计算所有系数
print("\n🔄 开始计算各类系数...")
results = calc_coefficients(intermediate, total_output)
# 可视化并保存
print("📊 生成可视化图表...")
fig = visualize_results(results, sectors)
# 生成详细报告
print("📝 生成分析报告...")
report_df = pd.DataFrame({
'部门': sectors,
'总产出': total_output,
'中间投入率': results['intermediate_input_rate'],
'中间需求率': results['intermediate_demand_rate'],
'感应度系数': results['sensitivity_coeff']
})
# 添加状态列
report_df['状态'] = report_df.index.map(
lambda x: '零产出' if total_output[x] == 0 else '活跃'
)
report_df.to_excel("io_analysis_report_detailed.xlsx", index=False)
# 生成零产出部门专项报告
zero_output_mask = total_output == 0
if np.any(zero_output_mask):
zero_report = report_df[zero_output_mask].copy()
zero_report.to_excel("zero_output_sectors_report.xlsx", index=False)
print(f"📌 零产出部门报告已保存: zero_output_sectors_report.xlsx")
print("\n🎉 计算完成!")
print("📊 图表已保存为: io_analysis_full.png")
print("📋 详细报告已保存为: io_analysis_report_detailed.xlsx")
# 显示关键统计
print(f"\n📈 关键指标:")
print(f" 平均感应度系数: {results['sensitivity_coeff'].mean():.3f}")
print(f" 最高感应度: {results['sensitivity_coeff'].max():.3f}")
print(f" 最低感应度: {results['sensitivity_coeff'].min():.3f}")
最新发布