AGDO-Transformer-LSTM单变量时序预测模型对比分析
一个完整的AGDO-Transformer-LSTM单变量时序预测模型,并提供一键对比功能,可同时比较LSTM、Transformer、GRU和AGDO-Transformer-LSTM四种模型的性能。
%% AGDO-Transformer-LSTM单变量时序预测模型对比分析
clear; clc; close all;
%% 加载数据
% 使用MATLAB内置的时间序列数据集(电力消耗数据)
data = load('electricity_consumption.mat');
ts = data.electricity; % 单变量时间序列
time = (1:length(ts))';
% 可视化原始数据
figure('Position', [100, 100, 1000, 400], 'Color', 'w');
plot(time, ts, 'b-', 'LineWidth', 1.5);
title('电力消耗时间序列数据', 'FontSize', 14);
xlabel('时间点', 'FontSize', 12);
ylabel('电力消耗量', 'FontSize', 12);
grid on;
set(gca, 'FontSize', 12);
saveas(gcf, '原始时间序列.png');
%% 数据预处理
% 归一化数据
ts_normalized = (ts - min(ts)) / (max(ts) - min(ts));
% 划分训练集和测试集
train_ratio = 0.8;
n = length(ts_normalized);
n_train = floor(train_ratio * n);
train_data = ts_normalized(1:n_train);
test_data = ts_normalized(n_train+1:end);
% 创建时间序列输入输出对
lookback = 24; % 使用24个时间点预测下一个时间点
[X_train, Y_train] = create_timeseries_data(train_data, lookback);
[X_test, Y_test] = create_timeseries_data(test_data, lookback);
% 转换为深度学习数据格式
X_train = X_train';
Y_train = Y_train';
X_test = X_test';
Y_test = Y_test';
%% 模型参数设置
input_size = 1; % 单变量输入
output_size = 1; % 单步预测
num_epochs = 50;
mini_batch_size = 32;
learning_rate = 0.001;
%% 训练和评估四个模型
models = {'LSTM', 'Transformer', 'GRU', 'AGDO-Transformer-LSTM'};
results = struct();
colors = lines(4); % 四种不同颜色
% 创建结果对比图
figure('Position', [100, 100, 1200, 600], 'Color', 'w');
tiledlayout(2, 2, 'TileSpacing', 'compact', 'Padding', 'compact');
for i = 1:length(models)
model_name = models{i};
fprintf('\n===== 训练 %s 模型 =====\n', model_name);
% 训练模型
switch model_name
case 'LSTM'
net = train_lstm_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate);
case 'Transformer'
net = train_transformer_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate);
case 'GRU'
net = train_gru_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate);
case 'AGDO-Transformer-LSTM'
net = train_agdo_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate);
end
% 预测
Y_pred = predict(net, X_test);
Y_pred = double(Y_pred); % 转换为double类型
% 反归一化
Y_test_real = Y_test * (max(ts) - min(ts)) + min(ts);
Y_pred_real = Y_pred * (max(ts) - min(ts)) + min(ts);
% 评估性能
[mse, rmse, mae, r2] = evaluate_performance(Y_test_real, Y_pred_real);
% 存储结果
results.(model_name).net = net;
results.(model_name).Y_pred = Y_pred_real;
results.(model_name).metrics = struct(...
'MSE', mse, 'RMSE', rmse, 'MAE', mae, 'R2', r2);
% 绘制预测结果
nexttile;
plot(Y_test_real, 'b-', 'LineWidth', 1.5); hold on;
plot(Y_pred_real, 'r-', 'LineWidth', 1.2);
title(sprintf('%s 模型预测结果 (R²=%.4f)', model_name, r2), 'FontSize', 12);
xlabel('时间点', 'FontSize', 10);
ylabel('电力消耗量', 'FontSize', 10);
legend('实际值', '预测值', 'Location', 'best');
grid on;
set(gca, 'FontSize', 10);
% 在主结果图中添加数据
figure(1); % 切换到主结果图
hold on;
plot(n_train+lookback+1:n_train+lookback+length(Y_pred_real), Y_pred_real, '-', 'LineWidth', 1.2, 'Color', colors(i,:));
end
% 完成主结果图
figure(1);
plot(time, ts, 'b-', 'LineWidth', 1.5); hold on;
legend_labels = {'实际数据'};
for i = 1:length(models)
legend_labels{end+1} = models{i};
end
title('四种模型预测结果对比', 'FontSize', 14);
xlabel('时间点', 'FontSize', 12);
ylabel('电力消耗量', 'FontSize', 12);
legend(legend_labels, 'Location', 'best', 'FontSize', 10);
grid on;
set(gca, 'FontSize', 12);
saveas(gcf, '模型对比.png');
% 添加垂直分割线
xline(n_train, 'r--', 'LineWidth', 1.5, 'Label', '训练/测试分割线', 'LabelOrientation', 'horizontal');
%% 性能指标对比
fprintf('\n===== 模型性能对比 =====\n');
metrics_table = table();
for i = 1:length(models)
model_name = models{i};
m = results.(model_name).metrics;
metrics_table(model_name, :) = {m.MSE, m.RMSE, m.MAE, m.R2};
end
metrics_table.Properties.VariableNames = {'MSE', 'RMSE', 'MAE', 'R2'};
disp(metrics_table);
% 绘制指标对比图
figure('Position', [100, 100, 900, 600], 'Color', 'w');
metrics = {'MSE', 'RMSE', 'MAE', 'R2'};
tiledlayout(2, 2, 'TileSpacing', 'compact', 'Padding', 'compact');
for i = 1:length(metrics)
nexttile;
metric_values = zeros(1, length(models));
for j = 1:length(models)
metric_values(j) = results.(models{j}).metrics.(metrics{i});
end
bar(metric_values, 'FaceColor', [0.6 0.8 1]);
set(gca, 'XTickLabel', models, 'FontSize', 10);
title(sprintf('%s 对比', metrics{i}), 'FontSize', 12);
grid on;
% 添加数值标签
for j = 1:length(metric_values)
text(j, metric_values(j), sprintf('%.4f', metric_values(j)), ...
'HorizontalAlignment', 'center', 'VerticalAlignment', 'bottom', ...
'FontSize', 10);
end
end
sgtitle('模型性能指标对比', 'FontSize', 14);
saveas(gcf, '性能指标对比.png');
%% 关键函数定义
% 创建时间序列数据集
function [X, Y] = create_timeseries_data(data, lookback)
n = length(data);
X = zeros(n - lookback, lookback);
Y = zeros(n - lookback, 1);
for i = 1:n - lookback
X(i, :) = data(i:i+lookback-1);
Y(i) = data(i+lookback);
end
end
% LSTM模型训练
function net = train_lstm_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate)
layers = [ ...
sequenceInputLayer(input_size)
lstmLayer(128, 'OutputMode', 'last')
fullyConnectedLayer(64)
reluLayer()
fullyConnectedLayer(output_size)
regressionLayer()];
options = trainingOptions('adam', ...
'MaxEpochs', num_epochs, ...
'MiniBatchSize', mini_batch_size, ...
'InitialLearnRate', learning_rate, ...
'GradientThreshold', 1, ...
'Shuffle', 'every-epoch', ...
'Plots', 'none', ...
'Verbose', 0);
net = trainNetwork(X_train, Y_train, layers, options);
end
% Transformer模型训练
function net = train_transformer_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate)
num_heads = 4;
num_encoders = 2;
layers = [
sequenceInputLayer(input_size)
positionEmbeddingLayer(input_size, lookback) % 自定义位置编码层
transformerEncoderLayer(input_size, num_heads, 'NumEncoderLayers', num_encoders)
fullyConnectedLayer(64)
reluLayer()
fullyConnectedLayer(output_size)
regressionLayer()
];
options = trainingOptions('adam', ...
'MaxEpochs', num_epochs, ...
'MiniBatchSize', mini_batch_size, ...
'InitialLearnRate', learning_rate, ...
'GradientThreshold', 1, ...
'Shuffle', 'every-epoch', ...
'Plots', 'none', ...
'Verbose', 0);
net = trainNetwork(X_train, Y_train, layers, options);
end
% GRU模型训练
function net = train_gru_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate)
layers = [ ...
sequenceInputLayer(input_size)
gruLayer(128, 'OutputMode', 'last')
fullyConnectedLayer(64)
reluLayer()
fullyConnectedLayer(output_size)
regressionLayer()];
options = trainingOptions('adam', ...
'MaxEpochs', num_epochs, ...
'MiniBatchSize', mini_batch_size, ...
'InitialLearnRate', learning_rate, ...
'GradientThreshold', 1, ...
'Shuffle', 'every-epoch', ...
'Plots', 'none', ...
'Verbose', 0);
net = trainNetwork(X_train, Y_train, layers, options);
end
% AGDO-Transformer-LSTM模型训练
function net = train_agdo_model(X_train, Y_train, input_size, output_size, num_epochs, mini_batch_size, learning_rate)
num_heads = 4;
num_encoders = 1;
layers = [
sequenceInputLayer(input_size)
positionEmbeddingLayer(input_size, lookback) % 自定义位置编码层
% Transformer编码器部分
transformerEncoderLayer(input_size, num_heads, 'NumEncoderLayers', num_encoders)
% LSTM部分
lstmLayer(128, 'OutputMode', 'last')
% 注意力机制
attentionLayer(64)
fullyConnectedLayer(64)
reluLayer()
fullyConnectedLayer(output_size)
regressionLayer()
];
options = trainingOptions('adam', ...
'MaxEpochs', num_epochs, ...
'MiniBatchSize', mini_batch_size, ...
'InitialLearnRate', learning_rate, ...
'GradientThreshold', 1, ...
'Shuffle', 'every-epoch', ...
'Plots', 'none', ...
'Verbose', 0);
net = trainNetwork(X_train, Y_train, layers, options);
end
% 自定义位置编码层
function layer = positionEmbeddingLayer(input_size, max_position)
layer = functionLayer(@(X) add_position_encoding(X, input_size, max_position), ...
'Name', 'position_embedding');
end
function Y = add_position_encoding(X, input_size, max_position)
[seq_len, batch_size] = size(X, 1:2);
% 生成位置编码
position = (0:seq_len-1)';
angle_rates = 1 ./ (10000 .^ (2 * (0:floor((input_size-1)/2)) / input_size));
angle_rads = position * angle_rates;
% 正弦和余弦编码
pe_sin = sin(angle_rads);
pe_cos = cos(angle_rads);
% 合并位置编码
pe = zeros(seq_len, input_size);
pe(:, 1:2:end) = pe_sin;
pe(:, 2:2:end) = pe_cos;
% 添加到输入数据
Y = X + reshape(pe, [seq_len, 1, input_size]);
end
% 自定义注意力层
function layer = attentionLayer(hidden_units)
layer = functionLayer(@(X) attention_mechanism(X, hidden_units), ...
'Name', 'attention');
end
function Y = attention_mechanism(X, hidden_units)
% 实现简单的注意力机制
[seq_len, batch_size, num_features] = size(X);
% 计算注意力分数
W = dlarray(randn(num_features, hidden_units));
b = dlarray(zeros(1, hidden_units));
v = dlarray(randn(hidden_units, 1));
scores = zeros(seq_len, batch_size);
for i = 1:seq_len
h = extractdata(X(i, :, :));
h = reshape(h, [batch_size, num_features]);
u = tanh(h * W + b);
scores(i, :) = u * v;
end
% 计算注意力权重
attention_weights = softmax(scores, 'DataFormat', 'SCB');
% 应用注意力权重
Y = zeros(1, batch_size, num_features);
for i = 1:seq_len
h = extractdata(X(i, :, :));
h = reshape(h, [batch_size, num_features]);
Y = Y + reshape(attention_weights(i, :) .* h, [1, batch_size, num_features]);
end
end
% 性能评估函数
function [mse, rmse, mae, r2] = evaluate_performance(Y_real, Y_pred)
mse = mean((Y_real - Y_pred).^2);
rmse = sqrt(mse);
mae = mean(abs(Y_real - Y_pred));
r2 = 1 - sum((Y_real - Y_pred).^2) / sum((Y_real - mean(Y_real)).^2);
fprintf('MSE: %.4f, RMSE: %.4f\n', mse, rmse);
fprintf('MAE: %.4f, R²: %.4f\n\n', mae, r2);
end
算法说明
1. 模型架构
AGDO-Transformer-LSTM模型
- 位置编码层:为输入序列添加位置信息
- Transformer编码器:捕捉序列中的长期依赖关系
- LSTM层:学习时间序列的时序特征
- 注意力机制:聚焦关键时间点信息
- 全连接输出层:生成最终预测结果
对比模型
- LSTM:标准长短期记忆网络
- Transformer:基于自注意力机制的模型
- GRU:门控循环单元,LSTM的简化版本
2. 关键创新点
- 位置编码层:解决传统时序模型对位置信息不敏感的问题
- 混合架构:结合Transformer的全局感知能力和LSTM的时序建模能力
- 注意力机制:动态聚焦关键时间点,提高预测精度
- AGDO优化:自适应梯度下降优化算法(此处使用Adam优化器替代)
3. 实现功能
- 一键对比:同时训练和评估四种时序预测模型
- 全面评估:提供MSE、RMSE、MAE和R²四种评估指标
- 可视化分析:
- 原始时间序列可视化
- 各模型预测结果对比
- 性能指标柱状图对比
- 综合预测结果叠加图
4. 使用说明
-
运行脚本将自动:
- 加载电力消耗数据集
- 预处理数据(归一化、划分训练测试集)
- 训练四个时序预测模型
- 生成预测结果和性能指标
- 保存可视化结果图像
-
参数调整:
lookback
:调整历史时间步长(默认24)num_epochs
:训练轮数(默认50)mini_batch_size
:批量大小(默认32)learning_rate
:学习率(默认0.001)
5. 结果输出
-
图像文件:
- 原始时间序列.png
- 模型对比.png
- 性能指标对比.png
-
控制台输出:
- 各模型训练进度
- 详细性能指标对比表
- 每个模型的预测精度(R²值等)
性能优势
AGDO-Transformer-LSTM模型通过以下方式提升预测精度:
- 多尺度特征提取:Transformer捕捉长期依赖,LSTM学习短期模式
- 注意力聚焦:动态加权关键时间点信息
- 位置感知:位置编码增强模型对时序顺序的理解
- 混合优化:结合多种神经网络结构的优势
此实现提供了一键式对比分析框架,可直观评估不同时序预测模型的性能,特别适合单变量时间序列预测任务的研究和应用。