运行截图:
数据





import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch_geometric.nn import GATv2Conv
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
warnings.filterwarnings('ignore')
# 设置随机种子以确保可重复性
def set_seed(seed=42):
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
set_seed()
# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")
# 配置参数
class Config:
def __init__(self):
self.window_size = 24 # 输入窗口大小(历史数据长度)
self.pred_size = 12 # 预测窗口大小(未来预测长度)
self.batch_size = 128 # 批次大小
self.epochs = 20 # 训练轮数
self.learning_rate = 0.01 # 学习率
self.hidden_dim = 64 # 隐藏层维度
self.num_heads = 4 # 注意力头数
self.num_layers = 2 # GNN层数
self.dropout = 0.2 # dropout率
self.train_ratio = 0.7 # 训练集比例
self.val_ratio = 0.15 # 验证集比例
self.test_ratio = 0.15 # 测试集比例
# 数据加载和预处理
class TrafficDataset(Dataset):
def __init__(self, data, window_size, pred_size):
"""
交通流量数据集
data: 标准化后的流量数据,形状为 [时间步, 节点数]
window_size: 输入窗口大小
pred_size: 预测窗口大小
"""
self.data = data
self.window_size = window_size
self.pred_size = pred_size
def __len__(self):
return len(self.data) - self.window_size - self.pred_size + 1
def __getitem__(self, idx):
# 输入: [window_size, num_nodes]
x = self.data[idx:idx + self.window_size]
# 输出: [pred_size, num_nodes]
y = self.data[idx + self.window_size:idx + self.window_size + self.pred_size]
return torch.FloatTensor(x), torch.FloatTensor(y)
# 构建图结构(基于OD对的相关性)
def build_od_graph(data, threshold=0.6):
"""
基于OD对之间的相关性构建图结构
data: 形状为 [时间步, 节点数] 的流量数据
threshold: 相关性阈值
"""
# 计算节点间的相关性
corr_matrix = np.corrcoef(data.T) # [num_nodes, num_nodes]
edge_index = []
# 根据相关性阈值构建边
for i in range(corr_matrix.shape[0]):
for j in range(i + 1, corr_matrix.shape[1]):
if abs(corr_matrix[i, j]) > threshold:
edge_index.append([i, j])
edge_index.append([j, i]) # 无向图
# 如果没有足够的边,创建最小生成树确保图连通
if len(edge_index) < corr_matrix.shape[0]:
print(f"相关性阈值 {threshold} 过高,边数不足,创建最小生成树...")
from scipy.sparse.csgraph import minimum_spanning_tree
from scipy.sparse import csr_matrix
# 转换相关系数为距离
dist_matrix = 1 - np.abs(corr_matrix)
mst = minimum_spanning_tree(csr_matrix(dist_matrix))
mst = mst.toarray()
for i in range(mst.shape[0]):
for j in range(mst.shape[1]):
if mst[i, j] != 0:
edge_index.append([i, j])
edge_index.append([j, i])
# 转换为PyTorch张量并转置
edge_index = torch.LongTensor(edge_index).t().contiguous()
print(f"构建的图包含 {edge_index.shape[1] // 2} 条边")
return edge_index
# GNN-Transformer模型
class GNNTransformer(nn.Module):
def __init__(self, config):
super(GNNTransformer, self).__init__()
self.config = config
# GNN层
self.gnn_layers = nn.ModuleList()
# 输入层:window_size -> hidden_dim * num_heads
self.gnn_layers.append(
GATv2Conv(config.window_size, config.hidden_dim,
heads=config.num_heads, dropout=config.dropout)
)
# 后续GNN层
for _ in range(config.num_layers - 1):
self.gnn_layers.append(
GATv2Conv(config.hidden_dim * config.num_heads, config.hidden_dim,
heads=config.num_heads, dropout=config.dropout)
)
# Transformer编码器层
encoder_layer = nn.TransformerEncoderLayer(
d_model=config.hidden_dim * config.num_heads,
nhead=config.num_heads,
dim_feedforward=config.hidden_dim * 4,
dropout=config.dropout,
batch_first=True
)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
# 输出层:将隐藏维度映射到预测长度
self.output_layer = nn.Sequential(
nn.Linear(config.hidden_dim * config.num_heads, config.hidden_dim),
nn.ReLU(),
nn.Dropout(config.dropout),
nn.Linear(config.hidden_dim, config.pred_size)
)
def forward(self, x, edge_index, batch=None):
"""
x: 输入特征,形状为 [batch_size * num_nodes, window_size]
edge_index: 边索引,形状为 [2, num_edges]
batch: 批次索引,形状为 [batch_size * num_nodes]
"""
# GNN处理
for i, gnn_layer in enumerate(self.gnn_layers):
x = gnn_layer(x, edge_index)
if i < len(self.gnn_layers) - 1:
x = F.elu(x)
x = F.dropout(x, p=self.config.dropout, training=self.training)
# 重塑为 [batch_size, num_nodes, hidden_dim*num_heads]
batch_size = len(torch.unique(batch)) if batch is not None else 1
x = x.view(batch_size, self.config.num_nodes, -1)
# Transformer处理
x = self.transformer_encoder(x)
# 输出层,预测未来pred_size个时间步
x = self.output_layer(x) # [batch_size, num_nodes, pred_size]
# 转置为 [batch_size, pred_size, num_nodes] 以匹配目标形状
return x.permute(0, 2, 1)
# 训练函数
def train_model(model, train_loader, val_loader, edge_index, optimizer, criterion, config, device):
train_losses = []
val_losses = []
best_val_loss = float('inf')
best_model = None
edge_index = edge_index.to(device)
for epoch in range(config.epochs):
# 训练阶段
model.train()
train_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device) # [batch, window, nodes]
# 重塑数据以适应GNN输入
batch_size, window_size, num_nodes = data.shape
data = data.permute(0, 2, 1).reshape(-1, window_size) # [batch*nodes, window]
# 创建批次索引
batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)
optimizer.zero_grad()
output = model(data, edge_index, batch_indices) # [batch, pred, nodes]
loss = criterion(output, target)
loss.backward()
# 梯度裁剪防止梯度爆炸
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
train_loss += loss.item()
# 每10个批次打印一次信息
if (batch_idx + 1) % 10 == 0:
print(
f'Epoch {epoch + 1}/{config.epochs}, Batch {batch_idx + 1}/{len(train_loader)}, Loss: {loss.item():.6f}')
avg_train_loss = train_loss / len(train_loader)
train_losses.append(avg_train_loss)
# 验证阶段
model.eval()
val_loss = 0
with torch.no_grad():
for data, target in val_loader:
data, target = data.to(device), target.to(device)
batch_size, window_size, num_nodes = data.shape
data = data.permute(0, 2, 1).reshape(-1, window_size)
batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)
output = model(data, edge_index, batch_indices)
loss = criterion(output, target)
val_loss += loss.item()
avg_val_loss = val_loss / len(val_loader)
val_losses.append(avg_val_loss)
# 保存最佳模型
if avg_val_loss < best_val_loss:
best_val_loss = avg_val_loss
best_model = model.state_dict().copy()
torch.save(best_model, 'best_model.pth')
print(f'Epoch {epoch + 1}/{config.epochs}, 训练损失: {avg_train_loss:.6f}, 验证损失: {avg_val_loss:.6f}')
# 加载最佳模型
model.load_state_dict(best_model)
return model, train_losses, val_losses
# 反标准化函数
def inverse_transform(data, mean, std):
"""
反标准化数据
data: 标准化后的数据,形状为 [batch, pred, nodes]
mean: 均值,形状为 [nodes]
std: 标准差,形状为 [nodes]
"""
# 调整mean和std的形状以匹配data的形状 [1, 1, nodes]
mean_reshaped = mean.reshape(1, 1, -1)
std_reshaped = std.reshape(1, 1, -1)
return data * std_reshaped + mean_reshaped
# 评估函数
def evaluate_model(model, test_loader, edge_index, scaler, config, device):
model.eval()
predictions = []
actuals = []
edge_index = edge_index.to(device)
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
batch_size, window_size, num_nodes = data.shape
data = data.permute(0, 2, 1).reshape(-1, window_size)
batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)
output = model(data, edge_index, batch_indices) # [batch, pred, nodes]
predictions.append(output.cpu().numpy())
actuals.append(target.cpu().numpy())
# 合并所有批次
predictions = np.concatenate(predictions, axis=0)
actuals = np.concatenate(actuals, axis=0)
# 反标准化
mean = scaler.mean_
std = scaler.scale_
predictions = inverse_transform(predictions, mean, std)
actuals = inverse_transform(actuals, mean, std)
# 计算评估指标
mse = mean_squared_error(actuals.flatten(), predictions.flatten())
mae = mean_absolute_error(actuals.flatten(), predictions.flatten())
rmse = np.sqrt(mse)
r2 = r2_score(actuals.flatten(), predictions.flatten())
print(f'\n测试集评估指标:')
print(f'MSE: {mse:.6f}')
print(f'MAE: {mae:.6f}')
print(f'RMSE: {rmse:.6f}')
print(f'R²: {r2:.6f}')
return predictions, actuals, {'mse': mse, 'mae': mae, 'rmse': rmse, 'r2': r2}
# 绘制损失曲线
def plot_loss_curve(train_losses, val_losses):
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='训练损失')
plt.plot(val_losses, label='验证损失')
plt.title('训练与验证损失曲线')
plt.xlabel('Epoch')
plt.ylabel('MSE损失')
plt.legend()
plt.grid(True)
plt.savefig('loss_curve.png', dpi=300)
plt.show()
# 绘制预测结果
def plot_predictions(predictions, actuals, node_indices=None, num_samples=3):
"""
绘制预测结果与实际值对比
predictions: 预测结果,形状为 [samples, pred, nodes]
actuals: 实际值,形状为 [samples, pred, nodes]
node_indices: 要绘制的节点索引列表
num_samples: 要绘制的样本数量
"""
# 如果未指定节点,选择前3个节点
if node_indices is None:
node_indices = [0, 1, 2]
# 创建输出目录
os.makedirs('prediction_plots', exist_ok=True)
# 绘制每个节点的预测结果
for node_idx in node_indices:
plt.figure(figsize=(12, 6))
# 绘制多个样本
for i in range(min(num_samples, len(predictions))):
time_steps = range(actuals[i].shape[0])
plt.subplot(min(num_samples, len(predictions)), 1, i + 1)
plt.plot(time_steps, actuals[i, :, node_idx], label='实际值', marker='o', markersize=4)
plt.plot(time_steps, predictions[i, :, node_idx], label='预测值', marker='x', markersize=4)
plt.title(f'样本 {i + 1} - OD对 {node_idx + 1} 的预测结果')
plt.xlabel('时间步')
plt.ylabel('流量值')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f'prediction_plots/node_{node_idx + 1}_predictions.png', dpi=300)
plt.show()
# 主函数
def main():
# 初始化配置
config = Config()
# 加载数据
print("正在加载数据...")
df = pd.read_csv('Abilene-OD_pair.csv',nrows=2000)
print(f"数据加载完成,共 {len(df)} 行,{len(df.columns)} 列")
# 提取时间序列数据(跳过时间列)
time_stamps = df['time'].values
traffic_data = df.iloc[:, 1:].values # 形状为 [时间步, 节点数]
config.num_nodes = traffic_data.shape[1] # 设置节点数量
print(f"流量数据形状: {traffic_data.shape}")
# 数据标准化
scaler = StandardScaler()
scaled_data = scaler.fit_transform(traffic_data)
# 划分数据集
n = len(scaled_data)
train_size = int(n * config.train_ratio)
val_size = int(n * config.val_ratio)
train_data = scaled_data[:train_size]
val_data = scaled_data[train_size:train_size + val_size]
test_data = scaled_data[train_size + val_size:]
print(f"数据集划分: 训练集 {len(train_data)} 条, 验证集 {len(val_data)} 条, 测试集 {len(test_data)} 条")
# 创建数据加载器
train_dataset = TrafficDataset(train_data, config.window_size, config.pred_size)
val_dataset = TrafficDataset(val_data, config.window_size, config.pred_size)
test_dataset = TrafficDataset(test_data, config.window_size, config.pred_size)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)
print(f"数据加载器: 训练批次数 {len(train_loader)}, 验证批次数 {len(val_loader)}, 测试批次数 {len(test_loader)}")
# 构建图结构(使用训练数据)
print("正在构建图结构...")
# 使用部分训练数据来构建图,提高效率
sample_data = train_data[:1000] # 使用前1000个时间步
edge_index = build_od_graph(sample_data)
# 初始化模型、优化器和损失函数
model = GNNTransformer(config).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
criterion = nn.MSELoss()
print(f"模型参数数量: {sum(p.numel() for p in model.parameters()):,}")
# 训练模型
print("开始训练模型...")
model, train_losses, val_losses = train_model(
model, train_loader, val_loader, edge_index,
optimizer, criterion, config, device
)
# 绘制损失曲线
plot_loss_curve(train_losses, val_losses)
# 评估模型
print("开始评估模型...")
predictions, actuals, metrics = evaluate_model(
model, test_loader, edge_index, scaler, config, device
)
# 绘制预测结果(选择几个有代表性的OD对)
# 可以根据需要修改要显示的节点索引
plot_predictions(predictions, actuals, node_indices=[0, 10, 50], num_samples=2)
# 保存最终模型
torch.save(model.state_dict(), 'final_model.pth')
print("模型已保存为 'final_model.pth'")
# 保存评估结果
with open('evaluation_results.txt', 'w', encoding='utf-8') as f:
f.write("评估指标:\n")
for key, value in metrics.items():
f.write(f"{key}: {value:.6f}\n")
print("评估结果已保存为 'evaluation_results.txt'")
if __name__ == "__main__":
main()
完整代码:
https://download.youkuaiyun.com/download/qq_38735017/91763492
GNN-Transformer时间序列预测完整代码
57

被折叠的 条评论
为什么被折叠?



