import os
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
from pathlib import Path
import matplotlib as mpl
import matplotlib.font_manager as fm
# ================== 路径配置 ==================
DATA_DIR = Path(r"D:/万泉河数据/斯皮尔曼相关分析/斯皮尔曼相关分析")
OUTPUT_DIR = Path(r"D:\万泉河数据\斯皮尔曼相关分析")
# 动态构建文件路径
excel_path = DATA_DIR / "万泉河土地利用类型以及浓度,斯皮尔曼相关分析.xlsx"
output_image = OUTPUT_DIR / "土地利用与污染物相关性分析.png"
# 自动创建缺失目录
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ================== 数据加载与处理 ==================
try:
df = pd.read_excel(excel_path, sheet_name='Sheet1')
print(f"✅ 成功读取数据文件: {excel_path}")
# 打印列名帮助调试
print("数据框列名:")
print(df.columns.tolist())
except FileNotFoundError:
print(f"❌ 文件不存在: {excel_path}")
exit(1)
except Exception as e:
print(f"❌ 文件读取错误: {str(e)}")
exit(1)
# ================== 统计分析 ==================
required_columns = ["水占比(%)", "污染物浓度"]
available_columns = df.columns.tolist()
# 查找最接近的列名
def find_closest_column(target, columns):
from difflib import get_close_matches
matches = get_close_matches(target, columns, n=1, cutoff=0.6)
return matches[0] if matches else None
# 自动匹配列名
matched_columns = {}
for col in required_columns:
if col in available_columns:
matched_columns[col] = col
else:
closest = find_closest_column(col, available_columns)
if closest:
print(f"⚠️ 未找到列 '{col}',使用最接近的列 '{closest}' 替代")
matched_columns[col] = closest
else:
print(f"❌ 未找到列 '{col}',且无相似列名")
exit(1)
column_x = matched_columns["水占比(%)"]
column_y = matched_columns["污染物浓度"]
print(f"使用列: X轴 = '{column_x}', Y轴 = '{column_y}'")
x = df[column_x]
y = df[column_y]
# 计算斯皮尔曼相关系数
rho, p_value = spearmanr(x, y)
print(f"斯皮尔曼相关系数: ρ = {rho:.4f}, p值 = {p_value:.4e}")
# ================== 可视化优化 ==================
fig, ax = plt.subplots(figsize=(8, 6), dpi=120)
# 专业配色方案
point_color = '#3498db' # 蓝色
trendline_color = '#e74c3c' # 红色
background_color = '#f8f9fa' # 浅灰背景
# 设置背景色
fig.patch.set_facecolor(background_color)
ax.set_facecolor(background_color)
# 绘制散点图
scatter = ax.scatter(
x, y,
s=100,
c=point_color,
edgecolor='white',
linewidth=1.2,
alpha=0.85,
zorder=3
)
# 显著性符号定义
if p_value < 0.001:
significance = '***'
elif p_value < 0.01:
significance = '**'
elif p_value < 0.05:
significance = '*'
else:
significance = 'ns' # not significant
# 添加显著性分析文本框
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 设置中文字体
plt.rcParams['axes.unicode_minus'] = False # 显示负号
# 显著性文本框放右上角
textstr = f"Spearman ρ = {rho:.2f}\np = {p_value:.3e}"
props = dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.8, edgecolor='#bdc3c7')
ax.text(
0.95, 0.95, textstr,
transform=ax.transAxes,
fontsize=11,
verticalalignment='top',
horizontalalignment='right',
bbox=props
)
# 添加趋势线
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
x_range = np.linspace(min(x), max(x), 100)
ax.plot(
x_range,
p(x_range),
color=trendline_color,
linewidth=2.5,
linestyle='-',
zorder=2
)
# 设置中文坐标轴标签(默认用列名)
ax.set_xlabel(column_x, fontsize=12)
ax.set_ylabel(column_y, fontsize=12)
# 网格线与边框优化
for spine in ['top', 'right', 'bottom', 'left']:
ax.spines[spine].set_color('#95a5a6')
ax.spines[spine].set_linewidth(1.2)
ax.grid(True, linestyle='--', alpha=0.3, color='#bdc3c7')
ax.tick_params(axis='both', which='major', labelsize=11)
# 保存图像
plt.tight_layout()
try:
plt.savefig(
output_image,
dpi=600,
bbox_inches='tight',
pad_inches=0.2,
facecolor=background_color
)
print(f"✅ 图表已保存至: {output_image}")
if os.path.exists(output_image):
file_size = os.path.getsize(output_image) / 1024 # KB
print(f"✅ 文件验证成功: {output_image} (大小: {file_size:.2f} KB)")
else:
print(f"❌ 文件保存失败: {output_image}")
except Exception as e:
print(f"❌ 保存图片时出错: {str(e)}")
plt.show()
使得横纵坐标交点处只存在一个0
最新发布