获取数据:
# -*- coding: utf-8
import pandas as pd
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import xlrd
path = xlrd.open_workbook("C:\\Users\\Administrator\\Downloads\\data.xlsx")
table = path.sheets()[0]
nrows = table.nrows
print (nrows)
data_=[]
for i in range(nrows):
data_.append(table.row_values(i)[0])
data2 = pd.DataFrame(data_)
print (data2)
集中趋势的度量:
print ("众数: %d" %data2.mode().iloc[0] )
print("中位数: %d" %data2.median())
print ("下四分位数: %d" %data2.quantile(.25))
print ("上四分位数: %d" %data2.quantile(.75))
print ("算数平均数:%.2f" %data2.mean())
#非分组数据未用加权平均数
print ("几何平均数:%.2f" %stats.gmean(data2))
离散程度的度量:
print ("异众系数 " +str(1-stats.mode(stats)[1][0]/len(data2)))
print ("四分位差:%d" %(data2.quantile(.75)-data2.quantile(.25)))
print ("极差:%d" %(data2.max()-data2.min()))
#平均绝对离差
M = 0
for i in data2:
M += abs(i - data2.mean())
print(M/len(data2))
print ("方差:%d" %np.var(data2))
print ("标准差:%d" %np.std(data2) )
# 四分位差
np.percentile(data2,75) - np.percentile(data2,25)
# 离散系数
data2.std()/data2.mean()
偏度和峰度的度量:
# 偏度
from scipy import stats
stats.skew(data2)
# 峰度
stats.kurtosis(data2)