LSTM滑窗-预测(一)

import numpy
import matplotlib.pyplot as plt
import math
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense,LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn import metrics
import math
import numpy as np
/work/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/work/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
#设置随机种子
numpy.random.seed(7)

df = pd.read_csv("/work/johnson_folder/biggamesData/Tianchi_power.csv",index_col='record_date')
# df.record_date = pd.to_datetime(df.record_date)
df.index = pd.to_datetime(df.index)
dataframe = pd.DataFrame(df[df['user_id']==1416].iloc[:,1])

dataset = dataframe.values
dataset = dataset.astype("float64")
print(dataframe.head())

#异常值检测
fig = plt.figure(1,figsize=(9,6))
ax = fig.add_subplot(111)
bp = ax.boxplot(dataset)
print(bp['fliers'][0].get_ydata())
# plt.show()
             power_consumption
record_date                   
2015-01-01              818609
2015-01-02              827350
2015-01-03              833673
2015-01-04              830581
2015-01-05              830087
[1.00000e+00 1.00000e+00 1.00000e+00 3.69883e+05 7.54680e+04 1.57942e+05
 4.68002e+05 1.37228e+05 6.29450e+05 4.08753e+05 4.92286e+05 5.63922e+05
 4.62405e+05 5.70325e+05 6.02682e+05 6.45633e+05 3.98241e+05 5.93963e+05
 6.08549e+05 5.44697e+05 2.25140e+05 2.17706e+05 4.58999e+05 5.33392e+05
 5.65148e+05 5.59500e+05 5.64756e+05 6.69684e+05 6.73104e+05 6.37631e+05
 6.10640e+05 5.75675e+05 5.83203e+05 5.72880e+05 5.31026e+05 5.23621e+05
 5.38329e+05 5.52407e+05 5.63819e+05 4.70434e+05 4.81114e+05 4.98298e+05
 4.99090e+05 5.11628e+05 5.18299e+05 5.42511e+05 5.44703e+05 6.06036e+05]

[外链图片转存失败(img-KhgKvG6v-1562127671609)(output_2_1.png)]

#需要将数据标准化到0-1
scaler = MinMaxScaler(feature_range=(0,1))
dataset = scaler.fit_transform(dataset)
#分割训练集与测试集
train_size = int(len(dataset)*0.75)
test_size = len(dataset)-train_size

print(test_size)

train,test = dataset[0:train_size,:],dataset[train_size:len(dataset),:]
160
#时间滑窗

def create_dataset(dataset,look_back=1):
    dataX,dataY = [],[]
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back),0]
        dataX.append(a)
        dataY.append(dataset[i+look_back,0])
    return numpy.array(dataX),numpy.array(dataY)



def create_predict(dataset,look_back=1):
    dataX = []
    for i in range(len(dataset)-look_back):
        a = dataset[i+1:,0]
        dataX.append(a)
    return numpy.array(dataX)
look_back = 5

trainX,trainY = create_dataset(train,look_back)
testX,testY = create_dataset(test,look_back)
#需要转换数据,因为keras需要特征的数据格式
trainX = numpy.reshape(trainX,(trainX.shape[0],1,trainX.shape[1]))

testX = numpy.reshape(testX,(testX.shape[0],1,testX.shape[1]))
#创建一个LSTM模型
model = Sequential()
model.add(LSTM(4,input_shape=(1,look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(trainX,trainY,epochs=100,batch_size=3,verbose=2)
WARNING:tensorflow:From /work/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /work/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 1s - loss: 0.2342
Epoch 2/100
 - 1s - loss: 0.0131
Epoch 3/100
 - 1s - loss: 0.0086
Epoch 4/100
 - 1s - loss: 0.0085
Epoch 5/100
 - 1s - loss: 0.0085
Epoch 6/100
 - 1s - loss: 0.0085
Epoch 7/100
 - 1s - loss: 0.0085
Epoch 8/100
 - 1s - loss: 0.0085
Epoch 9/100
 - 1s - loss: 0.0086
Epoch 10/100
 - 0s - loss: 0.0085
Epoch 11/100
 - 0s - loss: 0.0084
Epoch 12/100
 - 1s - loss: 0.0084
Epoch 13/100
 - 1s - loss: 0.0085
Epoch 14/100
 - 1s - loss: 0.0084
Epoch 15/100
 - 1s - loss: 0.0084
Epoch 16/100
 - 1s - loss: 0.0083
Epoch 17/100
 - 1s - loss: 0.0084
Epoch 18/100
 - 1s - loss: 0.0085
Epoch 19/100
 - 1s - loss: 0.0084
Epoch 20/100
 - 1s - loss: 0.0083
Epoch 21/100
 - 1s - loss: 0.0084
Epoch 22/100
 - 1s - loss: 0.0083
Epoch 23/100
 - 1s - loss: 0.0082
Epoch 24/100
 - 1s - loss: 0.0083
Epoch 25/100
 - 1s - loss: 0.0082
Epoch 26/100
 - 1s - loss: 0.0083
Epoch 27/100
 - 1s - loss: 0.0083
Epoch 28/100
 - 0s - loss: 0.0083
Epoch 29/100
 - 1s - loss: 0.0084
Epoch 30/100
 - 1s - loss: 0.0083
Epoch 31/100
 - 1s - loss: 0.0083
Epoch 32/100
 - 1s - loss: 0.0082
Epoch 33/100
 - 1s - loss: 0.0082
Epoch 34/100
 - 1s - loss: 0.0083
Epoch 35/100
 - 1s - loss: 0.0081
Epoch 36/100
 - 1s - loss: 0.0082
Epoch 37/100
 - 0s - loss: 0.0083
Epoch 38/100
 - 0s - loss: 0.0082
Epoch 39/100
 - 0s - loss: 0.0083
Epoch 40/100
 - 0s - loss: 0.0082
Epoch 41/100
 - 0s - loss: 0.0081
Epoch 42/100
 - 1s - loss: 0.0082
Epoch 43/100
 - 1s - loss: 0.0082
Epoch 44/100
 - 0s - loss: 0.0081
Epoch 45/100
 - 1s - loss: 0.0081
Epoch 46/100
 - 1s - loss: 0.0081
Epoch 47/100
 - 1s - loss: 0.0081
Epoch 48/100
 - 1s - loss: 0.0081
Epoch 49/100
 - 1s - loss: 0.0081
Epoch 50/100
 - 0s - loss: 0.0082
Epoch 51/100
 - 0s - loss: 0.0081
Epoch 52/100
 - 0s - loss: 0.0081
Epoch 53/100
 - 0s - loss: 0.0082
Epoch 54/100
 - 0s - loss: 0.0081
Epoch 55/100
 - 0s - loss: 0.0082
Epoch 56/100
 - 0s - loss: 0.0081
Epoch 57/100
 - 1s - loss: 0.0082
Epoch 58/100
 - 0s - loss: 0.0081
Epoch 59/100
 - 1s - loss: 0.0081
Epoch 60/100
 - 1s - loss: 0.0081
Epoch 61/100
 - 1s - loss: 0.0081
Epoch 62/100
 - 1s - loss: 0.0082
Epoch 63/100
 - 1s - loss: 0.0082
Epoch 64/100
 - 1s - loss: 0.0081
Epoch 65/100
 - 1s - loss: 0.0081
Epoch 66/100
 - 1s - loss: 0.0082
Epoch 67/100
 - 1s - loss: 0.0081
Epoch 68/100
 - 0s - loss: 0.0081
Epoch 69/100
 - 1s - loss: 0.0082
Epoch 70/100
 - 1s - loss: 0.0081
Epoch 71/100
 - 1s - loss: 0.0081
Epoch 72/100
 - 1s - loss: 0.0080
Epoch 73/100
 - 1s - loss: 0.0080
Epoch 74/100
 - 1s - loss: 0.0081
Epoch 75/100
 - 1s - loss: 0.0081
Epoch 76/100
 - 1s - loss: 0.0080
Epoch 77/100
 - 1s - loss: 0.0081
Epoch 78/100
 - 1s - loss: 0.0080
Epoch 79/100
 - 1s - loss: 0.0081
Epoch 80/100
 - 1s - loss: 0.0081
Epoch 81/100
 - 1s - loss: 0.0080
Epoch 82/100
 - 0s - loss: 0.0081
Epoch 83/100
 - 1s - loss: 0.0081
Epoch 84/100
 - 1s - loss: 0.0080
Epoch 85/100
 - 1s - loss: 0.0083
Epoch 86/100
 - 1s - loss: 0.0081
Epoch 87/100
 - 1s - loss: 0.0081
Epoch 88/100
 - 1s - loss: 0.0080
Epoch 89/100
 - 1s - loss: 0.0081
Epoch 90/100
 - 1s - loss: 0.0081
Epoch 91/100
 - 1s - loss: 0.0082
Epoch 92/100
 - 1s - loss: 0.0080
Epoch 93/100
 - 1s - loss: 0.0081
Epoch 94/100
 - 0s - loss: 0.0081
Epoch 95/100
 - 1s - loss: 0.0080
Epoch 96/100
 - 1s - loss: 0.0081
Epoch 97/100
 - 1s - loss: 0.0080
Epoch 98/100
 - 1s - loss: 0.0080
Epoch 99/100
 - 1s - loss: 0.0080
Epoch 100/100
 - 1s - loss: 0.0081





<keras.callbacks.History at 0x7f5cff997c50>
#预测
trainPredict = model.predict(trainX)
testPredict  = model.predict(testX)


#预测结果转换为原有的格式
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([testY]) 
# # def rmsle(preds,dtrain):
# #     labels = dtrain.get_label()
# #     assert len(preds)==len(labels)
# #     labels = labels.tolist()
# #     preds = preds.tolist()
# #     terms_to_sum = [(math.log(labels[i]+1)-math.log(max(0,preds[i]))**2.0 for i,pred in enumerate(labels))]
# #     return 'rmsle',(sum(terms_to_sum)*(1.0/len(preds)))**0.5
# # print(rmsle(trainPredict[:,0],trainY[0]))
# # print(rmsle(testPredict[:,0],testY[0]))
# # def squared_log_error(pred, actual):
# #     return (np.log(pred + 1) - np.log(actual + 1))**2
# # true_vs_predicted = list(zip(trainPredict[:,0],trainY[0]))
# # rmsle = np.sqrt(true_vs_predicted.map(lambda t, p: squared_log_error(t, p)).mean())
# # print(rmsle)
# #计算rmse
# trainScore = math.sqrt(mean_squared_error(np.log(trainY[0]), np.log(trainPredict[:,0])))
# print('Train Score: %.2f RMSLE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(np.log(testY[0]), np.log(testPredict[:,0])))
# print('Test Score: %.2f RMSLE' % (testScore))
# mse = metrics.mean_squared_error(testY[0], testPredict[:,0])
# mae = metrics.mean_absolute_error(testY[0], testPredict[:,0])
# mean = testY[0].mean()
# # mse = metrics.mean_squared_error(trainY[0], trainPredict[:,0])
# # mae = metrics.mean_absolute_error(trainY[0], trainPredict[:,0])
# # mean = trainY[0].mean()
# per1 = mae/mean
# per2 = np.sqrt(mse)/mean
# print (per1,per2)
# import matplotlib.pyplot as plt
# import matplotlib
# zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
# plt.plot(list(trainY[0])+list(testY[0]),label = "train")
# plt.plot(list(trainPredict[:,0])+list(testPredict[:,0]),label = "test")
# plt.title("XX地级市XX行业用电量预测",fontproperties=zhfont1)
# plt.legend(loc=0)
# plt.show()
for i in range(30):
	p = dataset[-(look_back + 1):]
	p1 = create_predict(p, look_back)
	p2 = numpy.reshape(p1, (p1.shape[0], 1, p1.shape[1]))
	predict = model.predict(p2)
	dataset = numpy.vstack((dataset,predict))
	predict = scaler.inverse_transform(predict)
	print(predict)
[[1051844.5]]
[[1030572.3]]
[[1014005.94]]
[[1005629.8]]
[[999504.2]]
[[985769.9]]
[[974691.]]
[[964717.3]]
[[957068.1]]
[[951069.2]]
[[944732.44]]
[[938973.06]]
[[933648.4]]
[[929027.44]]
[[925098.94]]
[[921518.7]]
[[918299.8]]
[[915374.94]]
[[912762.6]]
[[910461.5]]
[[908408.4]]
[[906577.06]]
[[904934.2]]
[[903464.3]]
[[902156.]]
[[900989.56]]
[[899950.44]]
[[899022.5]]
[[898193.9]]
[[897454.94]]
引用\[1\]和\[2\]中提到了关于使用LSTM进行滑动窗口预测的问题。根据这些引用内容,滑动窗口预测存在些挑战和限制。当滑动窗口内的数据分布不呈现单趋势时,预测结果也不会服从某个单趋势。然而,当滑动窗口内的数据趋势单时,预测结果会跟随这个趋势延续下去。这种现象是由于滑动窗口的影响。 为了解决这个问题,引用\[2\]提出了几种解决方案。方案是尝试序列分解,将趋势序列单独建模,然后将分别建模的结果相加或相乘得到最终预测结果。方案二是加入外生变量,从时间列中衍生其他变量。方案三是在每个口内部进行加权,即训练口内部的权值变化与结果值之间的关系。方案四是尝试使用transformer系列模型。 此外,引用\[3\]还提到了滑动窗口预测的具体过程。在滚动预测中,预测的第个值的输入是训练集中最后口大小的数据。然后通过不断剔除输入数组的第个元素,并将未来预测值添加到输入数组的最后个元素中,实现滚动预测。 综上所述,LSTM滑动窗口预测存在些挑战,但可以通过尝试不同的解决方案来改善预测结果。 #### 引用[.reference_title] - *1* *2* *3* [[LSTM]时间序列预测存在的问题--滑动窗口把双刃剑【持续更新】](https://blog.youkuaiyun.com/qq_42658739/article/details/118928809)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control_2,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值