import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import shapiro, normaltest, anderson, norm
def comprehensive_normality_check(data, variables, alpha=0.05):
"""
综合正态性检验
"""
results = []
for var in variables:
if var in data.columns:
# 移除缺失值
clean_data = data[var].dropna()
if len(clean_data) > 3: # 至少需要4个观测值
# 描述性统计
n = len(clean_data)
mean_val = clean_data.mean()
std_val = clean_data.std()
skewness = clean_data.skew()
kurtosis = clean_data.kurtosis()
# Shapiro-Wilk检验(适合小样本)
shapiro_stat, shapiro_p = shapiro(clean_data)
# D'Agostino's K²检验(适合各种样本量)
dagostino_stat, dagostino_p = normaltest(clean_data)
# 判断是否正态
is_normal_shapiro = shapiro_p > alpha
is_normal_dagostino = dagostino_p > alpha
# 综合判断(两个检验都通过才算正态)
is_normal = is_normal_shapiro and is_normal_dagostino
# 添加详细解读
skew_interpretation = "对称分布" if abs(skewness) < 0.5 else (
"轻度偏态" if abs(skewness) < 1 else "明显偏态"
)
kurtosis_interpretation = "中等峰度" if abs(kurtosis) < 0.5 else (
"轻度尖峰/平峰" if abs(kurtosis) < 1 else "明显尖峰/平峰"
)
results.append({
'变量': var,
'样本量': n,
'均值': f"{mean_val:.3f}",
'标准差': f"{std_val:.3f}",
'偏度': f"{skewness:.3f} ({skew_interpretation})",
'峰度': f"{kurtosis:.3f} ({kurtosis_interpretation})",
'Shapiro_p': f"{shapiro_p:.4f}",
'Dagostino_p': f"{dagostino_p:.4f}",
'是否正态': "是" if is_normal else "否",
'正态性判断': "通过" if is_normal else "未通过"
})
# 绘制诊断图
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 直方图 + 正态曲线
sns.histplot(clean_data, kde=True, ax=axes[0])
xmin, xmax = axes[0].get_xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mean_val, std_val)
axes[0].plot(x, p, 'r', linewidth=2)
axes[0].set_title(f'{var} - 分布图\n偏度: {skewness:.3f}, 峰度: {kurtosis:.3f}')
# Q-Q图
stats.probplot(clean_data, dist="norm", plot=axes[1])
axes[1].set_title(f'{var} - Q-Q图')
plt.tight_layout()
plt.show()
# 打印详细解读
print(f"\n{'='*60}")
print(f"变量 '{var}' 的正态性检验详细解读:")
print(f"{'='*60}")
print(f"样本量: {n}")
print(f"均值: {mean_val:.3f}, 标准差: {std_val:.3f}")
print(f"偏度: {skewness:.3f} - {skew_interpretation}")
print(f"峰度: {kurtosis:.3f} - {kurtosis_interpretation}")
print(f"Shapiro-Wilk检验 p值: {shapiro_p:.4f}")
print(f"D'Agostino检验 p值: {dagostino_p:.4f}")
if is_normal:
print("✅ 结论: 数据服从正态分布")
print(" 可以安全使用参数检验方法(如t检验、ANOVA、回归分析)")
else:
print("❌ 结论: 数据不服从正态分布")
print(" 建议使用非参数检验方法或进行数据转换")
# 检查偏度和峰度的具体问题
if abs(skewness) > 1:
direction = "右偏" if skewness > 0 else "左偏"
print(f" ⚠ 注意: 数据呈现明显{direction}(偏度 = {skewness:.3f})")
if abs(kurtosis) > 1:
direction = "尖峰" if kurtosis > 0 else "平峰"
print(f" ⚠ 注意: 数据呈现明显{direction}分布(峰度 = {kurtosis:.3f})")
print(f"{'='*60}")
return pd.DataFrame(results)
# 读取数据
data = pd.read_excel('ANOVA数据分析.xlsx')
# 确定需要检验的变量(请根据您的实际列名修改)
variables_to_check = [
'情感值', # 情绪反应(因变量)
'吸引力', # 复愈性感知维度1
'延展性', # 复愈性感知维度2
'远离', # 复愈性感知维度3
'兼容性' # 复愈性感知维度4
]
# 执行正态性检验
print("开始进行正态性检验...")
normality_results = comprehensive_normality_check(data, variables_to_check)
# 输出汇总结果
print("\n\n正态性检验结果汇总:")
print("=" * 100)
print(normality_results.to_string(index=False))
print("=" * 100)
# 提供统计方法建议
normal_vars = normality_results[normality_results['是否正态'] == '是']['变量'].tolist()
non_normal_vars = normality_results[normality_results['是否正态'] == '否']['变量'].tolist()
print(f"\n统计方法建议:")
print(f"服从正态分布的变量: {', '.join(normal_vars) if normal_vars else '无'}")
print(f"不服从正态分布的变量: {', '.join(non_normal_vars) if non_normal_vars else '无'}")
if len(non_normal_vars) == 0:
print("✅ 所有变量都服从正态分布,可以使用参数检验方法:")
print(" - 多元线性回归分析您的H2和H2a,b,c假设")
elif len(non_normal_vars) > 0 and len(normal_vars) > 0:
print("⚠ 部分变量不服从正态分布,建议:")
print(" - 对非正态变量进行数据转换")
print(" - 或使用广义线性模型(GLM)")
print(" - 或使用非参数检验方法")
else:
print("❌ 所有变量都不服从正态分布,建议:")
print(" - 对所有变量进行数据转换")
print(" - 或使用非参数统计方法")
print(" - 或使用稳健回归方法")
# 添加正态性判断标准说明
print(f"\n正态性判断标准说明:")
print(f"1. Shapiro-Wilk检验: p > {0.05} 表示不能拒绝正态性原假设")
print(f"2. D'Agostino检验: p > {0.05} 表示不能拒绝正态性原假设")
print(f"3. 偏度绝对值 < 0.5: 近似对称分布")
print(f"4. 偏度绝对值 0.5-1: 轻度偏态")
print(f"5. 偏度绝对值 > 1: 明显偏态")
print(f"6. 峰度绝对值 < 0.5: 中等峰度")
print(f"7. 峰度绝对值 0.5-1: 轻度尖峰/平峰")
print(f"8. 峰度绝对值 > 1: 明显尖峰/平峰") 我运行这一段代码,出现这个结果是什么原因”C:\Users\HUAWEI\miniconda3\python.exe D:\python-learning\解说词元素分析\文件排序.py
开始进行正态性检验...
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 24773 (\N{CJK UNIFIED IDEOGRAPH-60C5}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 24863 (\N{CJK UNIFIED IDEOGRAPH-611F}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 20540 (\N{CJK UNIFIED IDEOGRAPH-503C}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 20559 (\N{CJK UNIFIED IDEOGRAPH-504F}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 24230 (\N{CJK UNIFIED IDEOGRAPH-5EA6}) missing from font(s) DejaVu Sans.
plt.tight_layout()
D:\python-learning\解说词元素分析\文件排序.py:78: UserWarning: Glyph 23792 (\N{CJK UNIFIED IDEOGRAPH-5CF0}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 24773 (\N{CJK UNIFIED IDEOGRAPH-60C5}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 24863 (\N{CJK UNIFIED IDEOGRAPH-611F}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 20540 (\N{CJK UNIFIED IDEOGRAPH-503C}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 20998 (\N{CJK UNIFIED IDEOGRAPH-5206}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 24067 (\N{CJK UNIFIED IDEOGRAPH-5E03}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 22270 (\N{CJK UNIFIED IDEOGRAPH-56FE}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 20559 (\N{CJK UNIFIED IDEOGRAPH-504F}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 24230 (\N{CJK UNIFIED IDEOGRAPH-5EA6}) missing from font(s) DejaVu Sans.
func(*args)
C:\Users\HUAWEI\miniconda3\Lib\tkinter\__init__.py:861: UserWarning: Glyph 23792 (\N{CJK UNIFIED IDEOGRAPH-5CF0}) missing from font(s) DejaVu Sans.
func(*args)
“