效果
原理
LSTM即长短记忆网络,是一种很强的RNN,这种网络的特性是以前的输入会影响现在的输出,具体原理请自行搜索。
算法流程:
- 获取yahoo财经网上的股价数据,进行预处理
- 搭建一个简单网络,前面是LSTM用来回归,后面是全连接用来输出
- 测试,可视化处理
代码
注释很详细(其实也没啥要说的)
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM
#解决中文乱码及符号问题 别在意
matplotlib.rcParams['font.family'] = 'simHei'
matplotlib.rcParams['axes.unicode_minus'] = False
#这行是因为我用jupyternotebook 希望plt是单独一个窗口
#如果希望plt嵌在命令行改用 %matplotlib inline
%matplotlib qt5
'''
数据获取与预处理
'''
#公司选择Facebook
company = 'FB'
#选取时间段
start = dt.datetime(2012,1,1)
end = dt.datetime(2020,1,1)
data = web.DataReader(company,'yahoo',start,end)
#归一化
scaler = MinMaxScaler(feature_range=(0,1))
#Close是指休市时的股价
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
#用60天的数据来预测下一天的数据
#举个例子 x[0]是0~59天的股价 y[0]是第60天的股价
prediction_days = 60
x_train = []
y_train = []
for x in range(prediction_days,len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x,0])
y_train.append(scaled_data[x,0])
x_train,y_train = np.array(x_train),np.array(y_train)
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
'''
搭建模型
'''
#3层LSTM 最后1层全连接用来输出
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
#adam优化 最小二乘损失
model.compile(optimizer='adam',loss='mean_squared_error')
#训练
model.fit(x_train,y_train,epochs=25,batch_size=32)
#测试
test_start = dt.datetime(2020,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company,'yahoo',test_start,test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'],test_data['Close']),axis=0)
#预测2020-01-01及以后的股价
model_inputs = total_dataset[len(data)-prediction_days:].values
model_inputs = model_inputs.reshape(-1,1)
model_inputs = scaler.transform(model_inputs)
x_test = []
for x in range(prediction_days,len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x])
x_test = np.array(x_test)
x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1))
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
#可视化
length = len(predicted_prices)
begin = dt.date(2020,6,1)
delta = dt.timedelta(days=1)
d = begin
ticks = []
for i in range(length):
if i%50 == 0:
ticks.append(d.strftime("%Y-%m-%d"))
d += delta
plt.plot(actual_prices,c = 'b',label='实际')
plt.plot(predicted_prices,c = 'r',label = '预测结果')
plt.title('脸书股价预测')
plt.xlabel('Time')
plt.ylabel('Prices')
plt.ylim(ymin=0)
plt.xticks([i for i in range(length) if i % 50 == 0],ticks)
plt.legend()
plt.show()
应用
如果你想预测明天的股价,可以用下面的代码
tmr_data = [model_inputs[-prediction_days:,0]]
tmr_data = np.array(tmr_data)
tmr_data = np.reshape(tmr_data,(tmr_data.shape[0],tmr_data.shape[1],1))
ans = model.predict(tmr_data)
ans = scaler.inverse_transform(ans)
print(ans)
输出
[[223.61644]]