python 时间序列预测——多序列预测

最新推荐文章于 2025-10-29 21:02:48 发布

原创最新推荐文章于 2025-10-29 21:02:48 发布 · 2.8k 阅读

12 ·

CC 4.0 BY-SA版权

我们不生产知识，我们只是互联网的搬运工

编程语言同时被 2 个专栏收录

240 篇文章

订阅专栏

时间序列

90 篇文章

订阅专栏

本文介绍了一种使用Python和机器学习技术预测股票指数波动的方法。通过下载并解析股票指数数据，预处理数据，引入时滞特征，并利用SimpleRNN模型进行训练，最终实现了对富时100指数和道琼斯工业指数波动的预测。

数据集

股票指数，STOCKINDEX

import numpy as np
import pandas as pd
import urllib.request as request
import zipfile

# download
url ="http://www.economicswebinstitute.org/data/stockindexes.zip"
loc ="./stockindexes.zip"
request . urlretrieve (url , loc)

# unzip
dest_location ="./"
unzip = zipfile.ZipFile(loc ,'r')
unzip.extractall(dest_location )
unzip.close ()

# load one xls
loc = "stockindexes.xls"
Excel_file = pd. ExcelFile (loc)
print (Excel_file . sheet_names)
'''
['Description', 'Dow Jones Industrial', 'S&P500', 'NIKKEI 300', 'Dax30', 'CAC40', 'Swiss Market-Price Index', 'Mib30', 'IBEX 35I', 'Bel20', 'FTSE100']
'''

预测对象

主要关注两个指数：FTSE100【富时100指数】，Dow Jones Industrial【道琼斯工业指数】

ftse_data = Excel_file .parse ('FTSE100')
dj_data = Excel_file .parse ('Dow Jones Industrial')

ftse100 = ftse_data . iloc [4:1357 ,1]
dj= dj_data . iloc [4:1357 ,1]

yt = pd. concat ([ ftse100 ,dj], axis =1)
yt = yt. reset_index ( drop = True )
yt. columns = ['ftse100', 'dj']
yt = yt. pct_change (1)
win =30
vol_t = yt. rolling ( window =win , center = True ).std ()
print(vol_t.shape)

plt.figure(figsize=(9,3))
plt.plot(vol_t)

在这里插入图片描述

预处理

引入时滞

x1 = np.log((vol_t.shift(1)/vol_t.shift(2))*vol_t.shift(1))
x2 = np.log((vol_t.shift(1)/vol_t.shift(3))*vol_t.shift(1))
x3 = np.log((vol_t.shift(1)/vol_t.shift(4))*vol_t.shift(1))
x4 = np.log((vol_t.shift(1)/vol_t.shift(5))*vol_t.shift(1))
x5 = np.log((vol_t.shift(1)/vol_t.shift(6))*vol_t.shift(1))
data =pd. concat ([ vol_t ,x1 ,x2 ,x3 ,x4 ,x5], axis=1)
data . columns = ['ftse_t ', 'dj_t ',
                  'ftse_t -1 ','dj_t -1 ',
                  'ftse_t -2 ','dj_t -2 ',
                  'ftse_t -3 ','dj_t -3 ',
                  'ftse_t -4 ','dj_t -4 ',
                  'ftse_t -5 ','dj_t -5 ']
data = data . dropna ()

cols_y = [ 'ftse_t ', 'dj_t ']
y = data [ cols_y ]

cols_x = [ 'ftse_t -1 ','dj_t -1 ',
       'ftse_t -2 ','dj_t -2 ',
       'ftse_t -3 ','dj_t -3 ',
       'ftse_t -4 ','dj_t -4 ',
       'ftse_t -5 ','dj_t -5 ']
x = data [ cols_x ]

归一化

num_attrib =10
scaler_x = preprocessing.MinMaxScaler(feature_range =(-1,1))
x = np.array (x).reshape((len(x),num_attrib))
x = scaler_x . fit_transform (x)

num_response =2
scaler_y = preprocessing . MinMaxScaler (feature_range =(0 , 1))
y = np. array (y). reshape (( len(y), num_response ))
y = scaler_y . fit_transform (y)

数据集拆分

train_end = 1131
data_end =len(y)
x_train =x[0: train_end ,]
x_test =x[train_end +1: data_end,]
y_train =y[0: train_end]
y_test =y[train_end +1: data_end]
x_train = np.reshape(x_train,(x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test,(x_test.shape[0], 1, x_test.shape[1]))
print (" Shape of x_train is ",x_train . shape)  # (1131, 1, 10)
print (" Shape of x_test is ",x_test . shape)  # (185, 1, 10)

训练

seed =2016
num_epochs =20
np.random.seed ( seed )
model = Sequential()
model .add(SimpleRNN ( units =10 ,activation ='sigmoid',input_shape =(1 , num_attrib )))
model .add(Dense(units = num_response ,activation ='linear'))
sgd = SGD (lr =0.01 , momentum =0.90 , nesterov = True )
model . compile ( loss ='mean_squared_error',optimizer = sgd)
model .fit( x_train , y_train , batch_size =1, epochs = num_epochs)

score_train = model .evaluate(x_train ,y_train , batch_size =1)
score_test = model .evaluate(x_test , y_test ,batch_size =1)
print ("in train MSE = ", round( score_train,5))
print ("in test MSE = ", round( score_test ,5))

预测

pred = fit1.predict (x_test ) 
# pred1 = scaler_y.inverse_transform (np. array( pred1 ). reshape (( len( pred1 ), 2)))

plt.plot(y_test[:,0])
plt.plot(pred[:,0])
plt.legend(['FTSE100','FTSE100 prediction'])
plt.show()

plt.plot(y_test[:,1])
plt.plot(pred[:,1])
plt.legend(['Dow Jones','Dow Jones prediction'])
plt.show()