# -*- coding: utf-8 -*-
"""
Created on Sun Jul 21 14:26:22 2019
@author: User
"""
# 《Python数据分析基础》中国统计出版社
import numpy as np
from scipy import stats
import pandas as pd
#import statsmodels.api as sm
#import statsmodels.formula.api as smf
#import matplotlib.pyplot as plt
#from statsmodels.stats.multicomp import pairwise_tukeyhsd
#from statsmodels.graphics.api import interaction_plot
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname='data\msyh.ttc')
water = pd.read_csv(u'data/ch8/water.csv',encoding = "gbk")
print(water['Net'].median())
def wilcoxon_signed_rank_test(samp, mu0=0):
temp = pd.DataFrame(np.asarray(samp),columns=['origin_data'])
temp['D'] = temp['origin_data'] - mu0
temp['rank'] = abs(temp['D']).rank()
posW = sum(temp[temp['D']>0]['rank'])
negW = sum(temp[temp['D']<0]['rank'])
n = temp[temp['D'] != 0]['rank'].count()
Z = (posW-n*(n+1)/4)/np.sqrt((n*(n+1)*(2*n+1))/24)
P = (1-stats.norm.cdf(abs(Z)))*2
return Z,P
print(wilcoxon_signed_rank_test(water['Net'], mu0=600))
运行:
(-1.9940749174328372, 0.04614386788589431)