基于LSTM的单变量输入，单变量输出的数据预测

本文链接：https://blog.youkuaiyun.com/lightingMaster/article/details/124071384

本文介绍了如何使用LSTM进行单变量时间序列预测，包括pandas库的应用、数据预处理（归一化）、模型构建（含LSTM层和Adam优化器）、损失函数选择以及训练和测试过程。特别关注了优化后的空燃比预测和模型性能改进。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

基于LSTM的单变量数据预测

前言

该笔记主要记录基于时序的单变量输入、单变量输出的LSTM数据预测。归一化采用除以最大值的方法。

一、pandas是什么？

示例：pandas 是基于NumPy 的一种工具，该工具是为了解决数据分析任务而创建的。

二、使用步骤

1.引入库

代码如下：

import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset

2.LSTM模型

代码如下：

class LstmRNN(nn.Module):
    """
        Parameters：
        - input_size: feature size
        - hidden_size: number of hidden units
        - output_size: number of output
        - num_layers: layers of LSTM to stack
    """

    def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=1):
        super().__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)  # utilize the LSTM model in torch.nn
        self.forwardCalculation = nn.Linear(hidden_size, output_size)

    def forward(self, _x):
        x, _ = self.lstm(_x)  # _x is input, size (seq_len, batch, input_size)
        s, b, h = x.shape  # x is output, size (seq_len, batch, hidden_size)
        x = x.view(s * b, h)
        x = self.forwardCalculation(x)
        x = x.view(s, b, -1)
        return x

if __name__ == '__main__':
    # create database

    class DiabetesDataset(Dataset):
        def __init__(self, filepath):
            xy = np.loadtxt('JT9Ddata.csv', delimiter=',', dtype=np.float32)
            self.len = xy.shape[0]
            self.t = xy[:, 0]
            self.x_data = xy[:, 1]
            self.y_data = xy[:, 2]
# 以上数据为np

        def __len__(self):
            return self.len


    Data = DiabetesDataset('JT9Ddata.csv')

    data_len = Data.len
    t = Data.t
    dataset = np.zeros((data_len, 2))
    dataset[:, 0] = Data.x_data
    dataset[:, 1] = Data.y_data
    dataset = dataset.astype('float32')

    # # 图像显示输入和输出数据随时间变化的曲线图
    # plt.figure()
    # plt.plot(t,  dataset[:, 0], label='l')
    # plt.plot(t,  dataset[:, 1], label='h')
    # plt.xlabel('t')
    # plt.ylabel('LPSpeed/HPSeed')
    # plt.legend(loc='upper right')
    # plt.show()

    # tensor的元素作归一化
    in_max = dataset[:, 0].max()
    in_min = dataset[:, 0].min()
    out_max = dataset[:, 1].max()
    out_min = dataset[:, 1].min()
    dataset_normal = np.zeros((data_len, 2))
    dataset_normal[:, 0] = (dataset[:, 0])/in_max
    dataset_normal[:, 1] = dataset[:, 1]/out_max
    dataset_normal = dataset_normal.astype('float32')

    # 将输入、输出数据归一化后绘制随时间变化的曲线图
    # plt.figure()
    # plt.plot(t,  dataset_normal[:, 0], label='in')
    # plt.plot(t,  dataset_normal[:, 1], label='out')
    # plt.xlabel('t')
    # plt.ylabel('fuel/Seed')
    # plt.legend(loc='upper right')
    # plt.show()

    # # 将数据划分为训练集和测试集
    train_data_ratio = 0.7  # Choose 30% of the data for testing
    train_data_len = int(data_len * train_data_ratio)
    train_x = dataset_normal[:train_data_len, 0]
    train_y = dataset_normal[:train_data_len, 1]
    INPUT_FEATURES_NUM = 1
    OUTPUT_FEATURES_NUM = 1
    t_for_training = t[:train_data_len]

    test_x = dataset_normal[train_data_len:, 0]
    test_y = dataset_normal[train_data_len:, 1]
    t_for_testing = t[train_data_len:]

    # ----------------- train -------------------
    train_x_tensor = train_x.reshape(-1, 5, INPUT_FEATURES_NUM)  # set batch size to 5
    train_y_tensor = train_y.reshape(-1, 5, OUTPUT_FEATURES_NUM)  # set batch size to 5

    # transfer data to pytorch tensor
    train_x_tensor = torch.from_numpy(train_x_tensor)
    train_y_tensor = torch.from_numpy(train_y_tensor)
    # test_x_tensor = torch.from_numpy(test_x)

    lstm_model = LstmRNN(INPUT_FEATURES_NUM, 16, output_size=OUTPUT_FEATURES_NUM, num_layers=1)  # 16 hidden units
    print('LSTM model:', lstm_model)
    print('model.parameters:', lstm_model.parameters)

    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-2)

    max_epochs = 1500
    for epoch in range(max_epochs):
        output = lstm_model(train_x_tensor)
        loss = loss_function(output, train_y_tensor)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if loss.item() < 1e-4:
            print('Epoch [{}/{}], Loss: {:.5f}'.format(epoch + 1, max_epochs, loss.item()))
            print("The loss value is reached")
            break
        elif (epoch + 1) % 100 == 0:
            print('Epoch: [{}/{}], Loss:{:.5f}'.format(epoch + 1, max_epochs, loss.item()))

    # prediction on training dataset
    predictive_y_for_training = lstm_model(train_x_tensor)
    lossy_for_training = loss_function(predictive_y_for_training, train_y_tensor)
    predictive_y_for_training = predictive_y_for_training.view(-1, OUTPUT_FEATURES_NUM).data.numpy()

    # torch.save(lstm_model.state_dict(), 'model_params.pkl') # save model parameters to files

    # ----------------- test -------------------
    # lstm_model.load_state_dict(torch.load('model_params.pkl'))  # load model parameters from files
    lstm_model = lstm_model.eval()  # switch to testing model

    # 对测试的输入数据预处理
    test_x_tensor = test_x.reshape(-1, 5, INPUT_FEATURES_NUM)
    test_y_tensor = test_y.reshape(-1, 5, INPUT_FEATURES_NUM)
    # set batch size to 5, the same value with the training set
    test_x_tensor = torch.from_numpy(test_x_tensor)
    test_y_tensor = torch.from_numpy(test_y_tensor)

    predictive_y_for_testing = lstm_model(test_x_tensor)
    lossy_for_testing = loss_function(predictive_y_for_testing, test_y_tensor)
    predictive_y_for_testing = predictive_y_for_testing.view(-1, OUTPUT_FEATURES_NUM).data.numpy()

绘图，空燃比显示缺乏优化

    # # 绘制训练集输出数据和训练集预测的输出数据随时间变化图
    # plt.figure(1)
    # plt.plot(t[0:train_data_len], predictive_y_for_training * out_max, 'g', label='train-predict')
    # plt.plot(t[0:train_data_len], train_y * out_max, 'r', label='train')
    # plt.xlabel('t')
    # plt.ylabel('Speed')
    # plt.legend(loc='upper right')
    # # 绘制测试集输出数据和测试集预测的输出数据随时间变化图
    # plt.figure(2)
    # plt.plot(t[train_data_len:], predictive_y_for_testing * out_max, 'g', label='test-predict')
    # plt.plot(t[train_data_len:], test_y * out_max, 'r', label='test')
    # # plt.plot(t[train_data_len:], dataset[train_data_len:, 1], 'b', label='yy')  # 验证输出结果反归一化后与原输出数据对比
    # plt.xlabel('t')
    # plt.ylabel('speed')
    # plt.legend(loc='upper right')
    # plt.savefig(r'test.jpg', dpi=400, bbox_inches='tight')
    # plt.show()

    # ----------------- plot -------------------
    plt.figure()
    plt.plot(t[0:train_data_len], dataset[0:train_data_len, 0], 'g', label='Fuel air ratio')
    plt.plot(t[0:train_data_len], dataset[0:train_data_len, 1], 'b', label='train_speed')
    plt.plot(t[0:train_data_len], predictive_y_for_training * out_max, 'y--', label='train_pre_speed')

    plt.plot(t[train_data_len:], dataset[train_data_len:, 0], 'g')
    plt.plot(t[train_data_len:], dataset[train_data_len:, 1], 'k', label='test_speed')
    plt.plot(t[train_data_len:], predictive_y_for_testing * out_max, 'm--', label='test_pre_speed')

    plt.plot([t[train_data_len], t[train_data_len]], [-1, 8000], 'r--', label='separation line')  # separation line

    plt.xlabel('t')
    plt.ylabel('Fuel air ratio//speed')
    plt.legend(loc='lower left')  # upper right

    plt.show()