#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 27 11:06:48 2019
@author: vicky
"""
import numpy as np
from numpy import linalg as la
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
import statsmodels.tsa.stattools as st
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
import copy
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
import warnings
from statsmodels.stats.diagnostic import acorr_ljungbox
import arch
from arch import arch_model
import datetime as dt
#data=pd.read_csv('/Users/vicky/Downloads/国债数据.csv')
def tsfunction(data):
xtrain=np.array(data.ix[:,1:])
#看哪些列包含nan
np.where(np.isnan(xtrain))[1]
#去除包含nan的列
xtrain=np.delete(xtrain,np.where(np.isnan(xtrain))[1],1)
#-------------PCA降维到3个主成分--------
(n,p)=np.shape(xtrain)
Xsta=(xtrain-np.tile(np.mean(xtrain,axis=0),(n,1)))/np.tile(np.std(xtrain,axis=0),(n,1));#标准化样本数据 X
U,D,V=la.svd(Xsta)#SVD分解
D=np.asmatrix(D).T
#lam=np.multiply(D,D)/(n-1) #特征根
W=V.T/np.tile(np.sum(abs(V.T),axis=0),(p,1)) #标准化的特征向量
#f=lam/sum(lam)#方差贡献率=特征值/所有特征值总和
#检验是否可以主成分降维
#F=np.cumsum(lam)/sum(lam)#累计方差贡献率=前i个特征值总和/所有特征值总和
#Index=np.where(F[0,:]>0.95)#取累计方差贡献达到95%的q个主成分
#q=np.array(Index)[1,0]+1 #主成分个数
Xpc=np.dot(xtrain,W) #降维后的xpc矩阵=标准化后的特征向量*原x矩阵
pc=Xpc[:,0:3]#主成分矩阵=降维后x矩阵的前3列
#-------------时间序列-----------------
df=pd.DataFrame(pc)
df.columns=['level','slope','curvation']
df.index = data['trading_date']
#生成pd.Series对象
ts_level=pd.Series(df['level'].values, index=df.index)
ts_slope=pd.Series(df['slope'].values, index=df.index)
ts_curvation=pd.Series(df['curvation'].values, index=df.index)
#------------level,arima(1,1,3)
#一阶差分
ts_level_d = ts_level.diff().dropna()
ts_level_d.columns = ['diff']
#建模计算拟合值
def arima_ts_level(ts,ts_d,p,d,q):
model = ARIMA(ts, order=(p,d,q)) #ARIMA(p,d,q)
results_AR = model.fit(disp=-1) #disp为-1代表不输出收敛过程的信息,True代表输出
#差分还原
predictions_diff = pd.Series(results_AR.fittedvalues, copy=True)
predictions_diff_cumsum = predictions_diff.cumsum() #从第一个开始累计想加
predictions = pd.Series(ts.ix[0], index=ts.index)
predictions = predictions.add(predictions_diff_cumsum,fill_value=0)
return (predictions,results_AR)
pre_level,results_AR_level=arima_ts_level(ts_level,ts_level_d,1,1,3)
#-----------slope,ar(1)-arch(4)
ts_slope_d = ts_slope.diff().dropna()
ts_slope_d.columns = ['diff']
#建模计算拟合值,均值模型为AR(1)模型,波动率模型选择ARCH(4)模型
def arch_ts_slope(ts,ts_d,n_ar,n_arch):
am_ARCH = arch.arch_model(ts, mean='AR', lags=n_ar, vol='ARCH', p=n_arch)
res_ARCH = am_ARCH.fit()
#计算拟合值
pre_ARCH = res_ARCH.forecast(horizon=1,start=1,method='simulation')
pre_mean=pre_ARCH.mean
pre=pd.Series(pre_mean['h.1'], index=ts_slope_d.index)
#差分还原
pres_d = pd.Series(pre, copy=True)
pres_d_cumsum = pres_d.cumsum() #从第一个开始累计想加
pres = pd.Series(ts_slope.ix[0], index=ts_slope.index)
pres = pres.add(pres_d_cumsum,fill_value=0)
return(pres,res_ARCH)
pre_slope,results_ARCH_slope=arch_ts_slope(ts_slope,ts_slope_d,1,4)
#----curvation,arima(5,1,5)
ts_curvation_d = ts_curvation.diff().dropna()
ts_curvation_d.columns = ['diff']
#建模计算拟合值,用arima包会报错,改用sarimax包
def arima_ts_curvation(ts,ts_d,p,d,q):
model = sm.tsa.statespace.SARIMAX(ts_d,order=(p,0,q),freq='D',seasonal_order=(0,0,0,0),
enforce_stationarity=False, enforce_invertibility=False).fit()
#We can use SARIMAX model as ARIMAX when seasonal_order is (0,0,0,0) .
#注意这里用的是ts_curvation_d算,因为SARIMAX包的fittedvalues直接给出原始拟合值而不是差分拟合值
#差分还原
predictions_d = pd.Series(model.fittedvalues, copy=True)
predictions_d_cumsum = predictions_d.cumsum() #从第一个开始累计想加
predictions = pd.Series(ts.ix[0], index=ts_curvation.index)
predictions = predictions.add(predictions_d_cumsum,fill_value=0)
return(predictions,model)
pre_curvation,results_AR_curvation=arima_ts_curvation(ts_curvation,ts_curvation_d,5,1,5)
return(pre_level,pre_slope,pre_curvation)