基于LSTM的单变量数据预测
前言
该笔记主要记录基于时序的单变量输入、单变量输出的LSTM数据预测。归一化采用除以最大值的方法。
一、pandas是什么?
示例:pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。
二、使用步骤
1.引入库
代码如下:
import numpy as np import torch from torch import nn import matplotlib.pyplot as plt from torch.utils.data import Dataset
2.LSTM模型
代码如下 :
class LstmRNN(nn.Module): """ Parameters: - input_size: feature size - hidden_size: number of hidden units - output_size: number of output - num_layers: layers of LSTM to stack """ def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=1): super().__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers) # utilize the LSTM model in torch.nn self.forwardCalculation = nn.Linear(hidden_size, output_size) def forward(self, _x): x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) x = x.view(s * b, h) x = self.forwardCalculation(x) x = x.view(s, b, -1) return x
if __name__ == '__main__':
# create database
class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt('JT9Ddata.csv', delimiter=',', dtype=np.float32)
self.len = xy.shape[0]
self.t = xy[:, 0]
self.x_data = xy[:, 1]
self.y_data = xy[:, 2]
# 以上数据为np
def __len__(self):
return self.len
Data = DiabetesDataset('JT9Ddata.csv')
data_len = Data.len
t = Data.t
dataset = np.zeros((data_len, 2))
dataset[:, 0] = Data.x_data
dataset[:, 1] = Data.y_data
dataset = dataset.astype('float32')
# # 图像显示输入和输出数据随时间变化的曲线图
# plt.figure()
# plt.plot(t, dataset[:, 0], label='l')
# plt.plot(t, dataset[:, 1], label='h')
# plt.xlabel('t')
# plt.ylabel('LPSpeed/HPSeed')
# plt.legend(loc='upper right')
# plt.show()
# tensor的元素作归一化
in_max = dataset[:, 0].max()
in_min = dataset[:, 0].min()
out_max = dataset[:, 1].max()
out_min = dataset[:, 1].min()
dataset_normal = np.zeros((data_len, 2))
dataset_normal[:, 0] = (dataset[:, 0])/in_max
dataset_normal[:, 1] = dataset[:, 1]/out_max
dataset_normal = dataset_normal.astype('float32')
# 将输入、输出数据归一化后绘制随时间变化的曲线图
# plt.figure()
# plt.plot(t, dataset_normal[:, 0], label='in')
# plt.plot(t, dataset_normal[:, 1], label='out')
# plt.xlabel('t')
# plt.ylabel('fuel/Seed')
# plt.legend(loc='upper right')
# plt.show()
# # 将数据划分为训练集和测试集
train_data_ratio = 0.7 # Choose 30% of the data for testing
train_data_len = int(data_len * train_data_ratio)
train_x = dataset_normal[:train_data_len, 0]
train_y = dataset_normal[:train_data_len, 1]
INPUT_FEATURES_NUM = 1
OUTPUT_FEATURES_NUM = 1
t_for_training = t[:train_data_len]
test_x = dataset_normal[train_data_len:, 0]
test_y = dataset_normal[train_data_len:, 1]
t_for_testing = t[train_data_len:]
# ----------------- train -------------------
train_x_tensor = train_x.reshape(-1, 5, INPUT_FEATURES_NUM) # set batch size to 5
train_y_tensor = train_y.reshape(-1, 5, OUTPUT_FEATURES_NUM) # set batch size to 5
# transfer data to pytorch tensor
train_x_tensor = torch.from_numpy(train_x_tensor)
train_y_tensor = torch.from_numpy(train_y_tensor)
# test_x_tensor = torch.from_numpy(test_x)
lstm_model = LstmRNN(INPUT_FEATURES_NUM, 16, output_size=OUTPUT_FEATURES_NUM, num_layers=1) # 16 hidden units
print('LSTM model:', lstm_model)
print('model.parameters:', lstm_model.parameters)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-2)
max_epochs = 1500
for epoch in range(max_epochs):
output = lstm_model(train_x_tensor)
loss = loss_function(output, train_y_tensor)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if loss.item() < 1e-4:
print('Epoch [{}/{}], Loss: {:.5f}'.format(epoch + 1, max_epochs, loss.item()))
print("The loss value is reached")
break
elif (epoch + 1) % 100 == 0:
print('Epoch: [{}/{}], Loss:{:.5f}'.format(epoch + 1, max_epochs, loss.item()))
# prediction on training dataset
predictive_y_for_training = lstm_model(train_x_tensor)
lossy_for_training = loss_function(predictive_y_for_training, train_y_tensor)
predictive_y_for_training = predictive_y_for_training.view(-1, OUTPUT_FEATURES_NUM).data.numpy()
# torch.save(lstm_model.state_dict(), 'model_params.pkl') # save model parameters to files
# ----------------- test -------------------
# lstm_model.load_state_dict(torch.load('model_params.pkl')) # load model parameters from files
lstm_model = lstm_model.eval() # switch to testing model
# 对测试的输入数据预处理
test_x_tensor = test_x.reshape(-1, 5, INPUT_FEATURES_NUM)
test_y_tensor = test_y.reshape(-1, 5, INPUT_FEATURES_NUM)
# set batch size to 5, the same value with the training set
test_x_tensor = torch.from_numpy(test_x_tensor)
test_y_tensor = torch.from_numpy(test_y_tensor)
predictive_y_for_testing = lstm_model(test_x_tensor)
lossy_for_testing = loss_function(predictive_y_for_testing, test_y_tensor)
predictive_y_for_testing = predictive_y_for_testing.view(-1, OUTPUT_FEATURES_NUM).data.numpy()
绘图,空燃比显示缺乏优化
# # 绘制训练集输出数据和训练集预测的输出数据随时间变化图
# plt.figure(1)
# plt.plot(t[0:train_data_len], predictive_y_for_training * out_max, 'g', label='train-predict')
# plt.plot(t[0:train_data_len], train_y * out_max, 'r', label='train')
# plt.xlabel('t')
# plt.ylabel('Speed')
# plt.legend(loc='upper right')
# # 绘制测试集输出数据和测试集预测的输出数据随时间变化图
# plt.figure(2)
# plt.plot(t[train_data_len:], predictive_y_for_testing * out_max, 'g', label='test-predict')
# plt.plot(t[train_data_len:], test_y * out_max, 'r', label='test')
# # plt.plot(t[train_data_len:], dataset[train_data_len:, 1], 'b', label='yy') # 验证输出结果反归一化后与原输出数据对比
# plt.xlabel('t')
# plt.ylabel('speed')
# plt.legend(loc='upper right')
# plt.savefig(r'test.jpg', dpi=400, bbox_inches='tight')
# plt.show()
# ----------------- plot -------------------
plt.figure()
plt.plot(t[0:train_data_len], dataset[0:train_data_len, 0], 'g', label='Fuel air ratio')
plt.plot(t[0:train_data_len], dataset[0:train_data_len, 1], 'b', label='train_speed')
plt.plot(t[0:train_data_len], predictive_y_for_training * out_max, 'y--', label='train_pre_speed')
plt.plot(t[train_data_len:], dataset[train_data_len:, 0], 'g')
plt.plot(t[train_data_len:], dataset[train_data_len:, 1], 'k', label='test_speed')
plt.plot(t[train_data_len:], predictive_y_for_testing * out_max, 'm--', label='test_pre_speed')
plt.plot([t[train_data_len], t[train_data_len]], [-1, 8000], 'r--', label='separation line') # separation line
plt.xlabel('t')
plt.ylabel('Fuel air ratio//speed')
plt.legend(loc='lower left') # upper right
plt.show()
总结
归一化方法
优化器:Adam
损失函数:MSELoss
学习率:0.01
空燃比(绿线)显示不和谐。需要重新设置一个y轴。
导入LSTM的输入数据train_x需要优化,更加符合发动机的二阶模型。
缺乏学习集的精度。
输出结果