股票价格预测
数据集 代码
目标
1)用长短时记忆网络根据以往股票收盘价格预测后面一天的收盘价格
2)可视化
1 预测
第一步: 导入模块
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
from keras.models import Sequential
# 网络层长短时记忆网路,丢弃层,全连接层
from keras.layers import LSTM, Dropout,Dense
# 用于标准化
from sklearn.preprocessing import MinMaxScaler
第二步 读取数据,并处理,用到数据表里面的时间和收盘价格
# 读取数据返回数据,保存结构为DataFrame 可看作Series的字典
# Series 一维标记数组 包含索引以及内容,内容可是整形、浮点数、python object等等
df = pd.read_csv("NSE-Tata-Global-Beverages-Limited.csv")
# 查看数据前行
df.head()
# pd.to_datatime将参数解析为datatime,按照年月日的格式进行解析
df["Date"] = pd.to_datetime(df.Date, format="%Y-%m-%d")
# 将时间作为每一列的索引
df.index = df['Date']
# 显示框大小
plt.figure(figsize=(16,16))
# 绘制收盘曲线
plt.plot(df['Close'],label='Close Prise history')
# plt.show()
# 将表格按照时间升序排序
data = df.sort_index(ascending=True,axis=0)
# 新建一个新的数据表用于存放排好序的时间和收盘值
new_dataset = pd.DataFrame(index=range(0,len(df)), columns=['Date','Close'])
# 往表里面加如数据
for i in range(0, len(data)):
new_dataset['Date'][i] = data['Date'][i]
new_dataset['Close'][i] = data['Close'][i]
# 将特征约束在(0,1)之间
scaler = MinMaxScaler(feature_range=(0,1))
# 得到只有时间和close的二维数组
final_dataset = new_dataset.values
# 将前面987个数据划为训练集
train_data = final_dataset[0:987,:]
# 后面划为验证集
valid_data = final_dataset[987:,:]
# 将时间赋给索引
new_dataset.index = new_dataset.Date
# 丢弃时间列
new_dataset.drop('Date', axis=1, inplace=True)
# 对数据进行缩放到0,1之间
scaled_data = scaler.fit_transform(new_dataset)
第三步 构建训练集和测试集
x_train_data, y_train_data = [], []
# 60天数据为一个样本,对应最后一天的数据为标签
for i in range(60, len(train_data)):
x_train_data.append(scaled_data[i-60:i, 0])
y_train_data.append(scaled_data[i, 0])
# list 转为数组
x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)
# 改变样本形状为(927,60,1)
x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1], 1))
# 获取后面300多个数据
input_data = new_dataset[len(new_dataset)-len(valid_data) - 60:].values
input_data = input_data.reshape(-1, 1)
# 标准化
input_data = scaler.transform(input_data)
X_test = []
# 构造测试集
for i in range(60, input_data.shape[0]):
X_test.append(input_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
第四步 搭建模型
# 创建序列模型,两个LSTM,一个全连接层
lstm_model = Sequential()
# 长短时记忆网络,units输出的维度,input_shape输入形状,默认激活函数为# tanh,
# 向序列里面加入网络
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train_data.shape[1],1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))
# 编译
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
第五步 训练、预测绘制结果
# 训练
lstm_model.fit(x_train_data, y_train_data, epochs=1, batch_size=1, verbose=2)
# 测试集预测结果
predicted_closing_price = lstm_model.predict(X_test)
# 将标准化的数据转为原始数据
predicted_closing_price = scaler.inverse_transform(predicted_closing_price)
# 保存模型
lstm_model.save('saved_model.h5')
# 绘图
train_data = new_dataset[:987]
valid_data = new_dataset[987:]
# 测试结果绘制的曲线
valid_data['Predictions'] = predicted_closing_price
plt.plot(train_data['Close'])
plt.plot(valid_data[['Close', "Predictions"]])
plt.show()
2 可视化
用到dash框架,先在终端进行安装
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
from keras.models import Sequential
from keras.layers import LSTM, Dropout,Dense
# dash一个开源框架用于构建数据可视化交互界面
# dash初始化应用
import dash
# 创建交互组件如图,数据范围,dropdowns
import dash_core_components as dcc
# 获取HTML tags
import dash_html_components as html
import pandas as pd
import plotly.graph_objs as go
from dash.dependencies import Input, Output
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import numpy as np
# 初始化应用
app = dash.Dash()
# ???
sever = app.server
加载处理数据,加载模型并进行预测,同第一部分代码一样
# 读取数据返回数据,保存结构为DataFrame 可看作Series的字典
# Series 一维标记数组 包含索引以及内容,内容可是整形、浮点数、python object等等
df = pd.read_csv("NSE-Tata-Global-Beverages-Limited.csv")
# 查看数据前行
df.head()
# pd.to_datatime将参数解析为datatime,按照年月日的格式进行解析
df["Date"] = pd.to_datetime(df.Date, format="%Y-%m-%d")
# 将时间作为每一列的索引
df.index = df['Date']
# 将表格按照时间升序排序
data = df.sort_index(ascending=True,axis=0)
# 新建一个新的数据表用于存放排好序的时间和收盘值
new_dataset = pd.DataFrame(index=range(0,len(df)), columns=['Date','Close'])
# 往表里面加如数据
for i in range(0, len(data)):
new_dataset['Date'][i] = data['Date'][i]
new_dataset['Close'][i] = data['Close'][i]
# 将特征约束在(0,1)之间
scaler = MinMaxScaler(feature_range=(0,1))
# 得到只有时间和close的二维数组
final_dataset = new_dataset.values
# 将前面987个数据划为训练集
train_data = final_dataset[0:987,:]
# 后面划为验证集
valid_data = final_dataset[987:,:]
# 将时间赋给索引
new_dataset.index = new_dataset.Date
# 丢弃时间列
new_dataset.drop('Date', axis=1, inplace=True)
# 对数据进行缩放到0,1之间
scaled_data = scaler.fit_transform(new_dataset)
x_train_data, y_train_data = [], []
# 60天数据为一个样本,对应最后一天的数据为标签
for i in range(60, len(train_data)):
x_train_data.append(scaled_data[i-60:i, 0])
y_train_data.append(scaled_data[i, 0])
x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)
# 改变样本形状为(927,60,1)
x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1], 1))
# 创建序列模型,两个LSTM,一个全连接层
# 获取后面300多个数据
input_data = new_dataset[len(new_dataset)-len(valid_data) - 60:].values
input_data = input_data.reshape(-1, 1)
# 标准化
input_data = scaler.transform(input_data)
X_test = []
# 构造测试集
for i in range(60, input_data.shape[0]):
X_test.append(input_data[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
#加载上一块保存的模型
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC' #A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))
lstm_model = load_model('saved_model.h5')
lstm_model.summary()
closing_price = lstm_model.predict(X_test)
closing_price = scaler.inverse_transform(closing_price)
train = new_dataset[:987]
valid = new_dataset[987:]
valid['Predictions'] = closing_price
可视化
# 加载股票数据
df = pd.read_csv('stock_data.csv')
app.layout = html.Div([
html.H1("Stock Price Analysis Dashboard", style={"textAlign": "center"}),
# 两个tab,一个NSE一个Facebook
dcc.Tabs(id='tabs', children=[
dcc.Tab(label='NSE-TATAGLOBAL STOCK DATA', children=[
html.Div([
# 显示两个图一个真实,一个预测
html.H2('Actual closing price', style={"textAlign": "center"}),
dcc.Graph(id='Actual Data',
figure={
# 数据 横坐标为日期索引,y坐标为收盘价格
"data": [
go.Scatter(x=train.index, y=valid['Close'], mode='markers')
],
# 布局
"layout":go.Layout(title='scatter plot', xaxis={'title':'Date'}, yaxis={'title': 'Closing Rate'})
}
),
html.H2("LSTM Predicted closing price",style={"textAlign": "center"}),
dcc.Graph(id="Predicted Data",
figure={
"data": [
go.Scatter(x=valid.index, y=valid["Predictions"], mode='markers')
],
"layout": go.Layout(title='scatter plot', xaxis={'title': 'Date'}, yaxis={'title': 'Closing Rate'})
}
)
])
]),
dcc.Tab(label='Facebook Stock Data', children=[
html.Div([
# 两个下拉菜单,选择对应的label,输出对应的图
html.H1("Facebook Stocks High vs Lows", style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown',
options=[{'label': 'Tesla', 'value': 'TSLA'},{'label': 'Apple','value': 'AAPL'},{'label': 'Facebook', 'value': 'FB'},{'label': 'Microsoft','value': 'MSFT'}],
multi=True, value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}
),
# 图的位置, 缺失的figure由回调函数通过用户设置的输入生成
dcc.Graph(id='Hlow'),
html.H1("Facebook Market Volume", style={'textAlign': 'center'}),
dcc.Dropdown(id='my-dropdown2',
options=[{'label': 'Tesla', 'value': 'TSLA'}, {'label': 'Apple','value': 'AAPL'}, {'label': 'Facebook', 'value': 'FB'}, {'label': 'Microsoft','value': 'MSFT'}],
multi=True, value=['FB'],
style={"display": "block", "margin-left": "auto",
"margin-right": "auto", "width": "60%"}
),
dcc.Graph(id='volume')
], className='container'),
])
])
])
# 回调函数实现交互,dash的回调函数包含常规python函数和一个装饰函数app.callback
# 当输入值变化,回调函数被触发,实现一些特定操作
@app.callback(Output('Hlow', 'figure'),
[Input('my-dropdown', 'value')])
def update_graph(selected_dropdown):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
trace2 = []
for stock in selected_dropdown:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["High"],
mode='lines', opacity=0.7,
name=f'High {dropdown[stock]}',textposition='bottom center'))
trace2.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Low"],
mode='lines', opacity=0.6,
name=f'Low {dropdown[stock]}',textposition='bottom center'))
traces = [trace1, trace2]
# 显示的数据
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1','#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"High and Low Prices for {', '.join(str(dropdown[i]) for i in selected_dropdown)} Over Time",
xaxis={"title":"Date",'rangeselector': {'buttons': list([{'count': 1, 'label': '1M', 'step': 'month','stepmode': 'backward'},{'count': 6, 'label': '6M', 'step': 'month','stepmode': 'backward'},{'step': 'all'}])},'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Price (USD)"})}
return figure
# 第二个图的dash回调函数
@app.callback(Output('volume', 'figure'),
[Input('my-dropdown2', 'value')])
def update_graph(selected_dropdown_value):
dropdown = {"TSLA": "Tesla","AAPL": "Apple","FB": "Facebook","MSFT": "Microsoft",}
trace1 = []
for stock in selected_dropdown_value:
trace1.append(
go.Scatter(x=df[df["Stock"] == stock]["Date"],
y=df[df["Stock"] == stock]["Volume"],
mode='lines', opacity=0.7,
name=f'Volume {dropdown[stock]}', textposition='bottom center'))
traces = [trace1]
data = [val for sublist in traces for val in sublist]
figure = {'data': data,
'layout': go.Layout(colorway=["#5E0DAC", '#FF4F00', '#375CB1', '#FF7400', '#FFF400', '#FF0056'],
height=600,
title=f"Market Volume for {', '.join(str(dropdown[i]) for i in selected_dropdown_value)} Over Time",
xaxis={"title":"Date",'rangeselector': {'buttons': list([{'count': 1,'label': '1M', 'step': 'month', 'stepmode': 'backward'}, {'count': 6, 'label': '6M','step': 'month','stepmode': 'backward'}, {'step': 'all'}])},'rangeslider': {'visible': True}, 'type': 'date'},
yaxis={"title":"Transactions Volume"})}
return figure
# 运行应用
if __name__ == "__main__":
app.run_server(debug=True)