import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import adfuller as ADF
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
# 一元时间序列分析 ARIMA
data = pd.read_csv("NZAccomodation.csv")
data['Date']=pd.to_datetime(data['DATE'])
data = data.drop(['Date'],axis=1)
data.set_index('DATE',inplace=True)
ts = data['BackPackers']
print(data.describe())
print(data.info())
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
ts.plot(color='green',marker='o',linestyle='dashed',linewidth=1)
plt.ylabel('BackPackers')
plt.title('BackPackers时间序列分析图')
plt.show()
# 输出ACF(自相关图)、PACF(偏自相关图)
def Acf(data):
f = plt.figure(facecolor='white')
ax1 = f.add_subplot(211)
plot_acf(data, lags=31, ax=ax1)
ax2 = f.add_subplot(212)
plot_pacf(data, lags=31, ax=ax2)
plt.show()
Acf(ts)
# ADF平稳性检验
def Adf_diy(data):
dftest = ADF(data,autolag='BIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','Lags Used','Number of Observations Used'])
print("检验结果:")
print(dfoutput)
Adf_diy(ts) # p约等于0.963779 显著大于0.05 判断为不平稳序列 故一定不是白噪声序列
# 差分并输出图像
ts_diff1 = ts.diff()
f = plt.figure(facecolor='white')
ts.plot(color='green', label='原始数据')
ts_diff1.plot(color='blue', label='1阶差分')
print(ts_diff1)
plt.legend()
plt.show()
# 检验一阶差分后序列是否平稳
ts_diff1=ts_diff1.dropna()
Acf(ts_diff1) # 绘制ACF和Pacf图
Adf_diy(ts_diff1) # 平稳性检验:p约等于0.000008 判断为平稳序列
# 白噪声检验
# p_value = acorr_ljungbox(ts_diff1,lags=[6,12])
# print(p_value) # p<0.05 为非白噪声序列
# 白噪声检验
def LB_test(timeseries):
[[lb], [p]] = acorr_ljungbox(timeseries, lags=1)
if p < 0.05:
print(u"原始序列为非白噪声序列")
else:
print(u"原始序列为白噪声序列")
LB_test(ts_diff1) # 差分后非白噪声序列
# ARIMA(p,d,q):p阶自回归滞后项,q阶滑动平均滞后项,d阶差分
# 根据bic准则编写函数来推荐最优的模型:
def SelectModel(data):
best_model = sm.tsa.ARMA(data, (0, 0)).fit()
for i in range(2):
for j in range(3):
arma_mod = sm.tsa.ARMA(data, (i, j)).fit()
print('当p和q为,相应的bic值为:', i, j, arma_mod.bic)
if best_model.bic > arma_mod.bic:
best_model = arma_mod
print('根据bic准则,选出最优的模型为:', best_model.summary2())
SelectModel(ts_diff1)
#建立ARIMA(1, 1, 2)模型
ts_diff1=ts_diff1.dropna()
train = ts[:-10] #最后10个数据用于预测
test = ts[-10:]
# 根据ACF和PACF以及差分 定阶并建模
model = ARIMA(ts.astype(float), order=(1, 1, 2))
arima_result = model.fit()
# 检查残差
residuals = pd.DataFrame(arima_result.resid)
fig,ax = plt.subplots(1,2)
residuals.plot(title="Residuals",ax=ax[0])
residuals.plot(kind='kde',title='Density',ax=ax[1])
plt.show()
# 拟合模型
print(arima_result.summary2())
# 模型预测
# print(arima_result.forecast(10))
arima_result.plot_predict(dynamic=False)
fc, se, conf = arima_result.forecast(10)
print("预测数据:\n",fc)
plt.legend()
plt.show()