# -*- coding: utf-8 -*-
"""
Created on Sat Jul 28 13:40:57 2018
@author: wangxihe
"""
#%%
import pandas as pd
import statsmodels.api as sm
import os
import numpy as np
import matplotlib.pyplot as plt
os.chdir(r'E:\spyderwork\wxh\数据科学B\样本检验')
#%%假设检验和单样本T检验
#预测房屋增长率率10%的可能性,置信区间
#增增长率不超过10%,否则要被问责
onedata=pd.read_csv('house_price_gr.csv',encoding='gbk')
onedata.describe(include='all')
onedata['duplicated']=onedata.duplicated()#判断短是否重复
(onedata['duplicated']+1).sum()==len(onedata) #不重复为false所以每个元素+1
qq=sm.qqplot(onedata['rate'] ,fit=True, line='45')#QQ图
onedata['rate'].hist(bins=30)
onedata['rate'].plot(kind='box')
import seaborn as sns
from scipy import stats
sns.distplot(onedata.rate, kde=True,fit=stats.norm) # Histograph
sns.distplot(onedata.rate, kde=True,rug=True,hist=True, fit=stats.norm) # Histograph
#%%#置信区间
u=onedata.rate.mean()
S=onedata.rate.std(