GNN-Transformer 时间序列预测数据代码-完整数据代码可直接运行

GNN-Transformer时间序列预测完整代码

运行截图:

数据

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch_geometric.nn import GATv2Conv
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings

# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
warnings.filterwarnings('ignore')


# 设置随机种子以确保可重复性
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_seed()

# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")


# 配置参数
class Config:
    def __init__(self):
        self.window_size = 24  # 输入窗口大小(历史数据长度)
        self.pred_size = 12  # 预测窗口大小(未来预测长度)
        self.batch_size = 128  # 批次大小
        self.epochs = 20  # 训练轮数
        self.learning_rate = 0.01  # 学习率
        self.hidden_dim = 64  # 隐藏层维度
        self.num_heads = 4  # 注意力头数
        self.num_layers = 2  # GNN层数
        self.dropout = 0.2  # dropout率
        self.train_ratio = 0.7  # 训练集比例
        self.val_ratio = 0.15  # 验证集比例
        self.test_ratio = 0.15  # 测试集比例



# 数据加载和预处理
class TrafficDataset(Dataset):
    def __init__(self, data, window_size, pred_size):
        """
        交通流量数据集
        data: 标准化后的流量数据,形状为 [时间步, 节点数]
        window_size: 输入窗口大小
        pred_size: 预测窗口大小
        """
        self.data = data
        self.window_size = window_size
        self.pred_size = pred_size

    def __len__(self):
        return len(self.data) - self.window_size - self.pred_size + 1

    def __getitem__(self, idx):
        # 输入: [window_size, num_nodes]
        x = self.data[idx:idx + self.window_size]
        # 输出: [pred_size, num_nodes]
        y = self.data[idx + self.window_size:idx + self.window_size + self.pred_size]
        return torch.FloatTensor(x), torch.FloatTensor(y)


# 构建图结构(基于OD对的相关性)
def build_od_graph(data, threshold=0.6):
    """
    基于OD对之间的相关性构建图结构
    data: 形状为 [时间步, 节点数] 的流量数据
    threshold: 相关性阈值
    """
    # 计算节点间的相关性
    corr_matrix = np.corrcoef(data.T)  # [num_nodes, num_nodes]
    edge_index = []

    # 根据相关性阈值构建边
    for i in range(corr_matrix.shape[0]):
        for j in range(i + 1, corr_matrix.shape[1]):
            if abs(corr_matrix[i, j]) > threshold:
                edge_index.append([i, j])
                edge_index.append([j, i])  # 无向图

    # 如果没有足够的边,创建最小生成树确保图连通
    if len(edge_index) < corr_matrix.shape[0]:
        print(f"相关性阈值 {threshold} 过高,边数不足,创建最小生成树...")
        from scipy.sparse.csgraph import minimum_spanning_tree
        from scipy.sparse import csr_matrix

        # 转换相关系数为距离
        dist_matrix = 1 - np.abs(corr_matrix)
        mst = minimum_spanning_tree(csr_matrix(dist_matrix))
        mst = mst.toarray()

        for i in range(mst.shape[0]):
            for j in range(mst.shape[1]):
                if mst[i, j] != 0:
                    edge_index.append([i, j])
                    edge_index.append([j, i])

    # 转换为PyTorch张量并转置
    edge_index = torch.LongTensor(edge_index).t().contiguous()
    print(f"构建的图包含 {edge_index.shape[1] // 2} 条边")
    return edge_index


# GNN-Transformer模型
class GNNTransformer(nn.Module):
    def __init__(self, config):
        super(GNNTransformer, self).__init__()
        self.config = config

        # GNN层
        self.gnn_layers = nn.ModuleList()
        # 输入层:window_size -> hidden_dim * num_heads
        self.gnn_layers.append(
            GATv2Conv(config.window_size, config.hidden_dim,
                      heads=config.num_heads, dropout=config.dropout)
        )

        # 后续GNN层
        for _ in range(config.num_layers - 1):
            self.gnn_layers.append(
                GATv2Conv(config.hidden_dim * config.num_heads, config.hidden_dim,
                          heads=config.num_heads, dropout=config.dropout)
            )

        # Transformer编码器层
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config.hidden_dim * config.num_heads,
            nhead=config.num_heads,
            dim_feedforward=config.hidden_dim * 4,
            dropout=config.dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)

        # 输出层:将隐藏维度映射到预测长度
        self.output_layer = nn.Sequential(
            nn.Linear(config.hidden_dim * config.num_heads, config.hidden_dim),
            nn.ReLU(),
            nn.Dropout(config.dropout),
            nn.Linear(config.hidden_dim, config.pred_size)
        )

    def forward(self, x, edge_index, batch=None):
        """
        x: 输入特征,形状为 [batch_size * num_nodes, window_size]
        edge_index: 边索引,形状为 [2, num_edges]
        batch: 批次索引,形状为 [batch_size * num_nodes]
        """
        # GNN处理
        for i, gnn_layer in enumerate(self.gnn_layers):
            x = gnn_layer(x, edge_index)
            if i < len(self.gnn_layers) - 1:
                x = F.elu(x)
                x = F.dropout(x, p=self.config.dropout, training=self.training)

        # 重塑为 [batch_size, num_nodes, hidden_dim*num_heads]
        batch_size = len(torch.unique(batch)) if batch is not None else 1
        x = x.view(batch_size, self.config.num_nodes, -1)

        # Transformer处理
        x = self.transformer_encoder(x)

        # 输出层,预测未来pred_size个时间步
        x = self.output_layer(x)  # [batch_size, num_nodes, pred_size]

        # 转置为 [batch_size, pred_size, num_nodes] 以匹配目标形状
        return x.permute(0, 2, 1)


# 训练函数
def train_model(model, train_loader, val_loader, edge_index, optimizer, criterion, config, device):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    best_model = None

    edge_index = edge_index.to(device)

    for epoch in range(config.epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)  # [batch, window, nodes]

            # 重塑数据以适应GNN输入
            batch_size, window_size, num_nodes = data.shape
            data = data.permute(0, 2, 1).reshape(-1, window_size)  # [batch*nodes, window]

            # 创建批次索引
            batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)

            optimizer.zero_grad()
            output = model(data, edge_index, batch_indices)  # [batch, pred, nodes]

            loss = criterion(output, target)
            loss.backward()

            # 梯度裁剪防止梯度爆炸
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()

            # 每10个批次打印一次信息
            if (batch_idx + 1) % 10 == 0:
                print(
                    f'Epoch {epoch + 1}/{config.epochs}, Batch {batch_idx + 1}/{len(train_loader)}, Loss: {loss.item():.6f}')

        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)

                batch_size, window_size, num_nodes = data.shape
                data = data.permute(0, 2, 1).reshape(-1, window_size)
                batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)

                output = model(data, edge_index, batch_indices)
                loss = criterion(output, target)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        # 保存最佳模型
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model = model.state_dict().copy()
            torch.save(best_model, 'best_model.pth')

        print(f'Epoch {epoch + 1}/{config.epochs}, 训练损失: {avg_train_loss:.6f}, 验证损失: {avg_val_loss:.6f}')

    # 加载最佳模型
    model.load_state_dict(best_model)
    return model, train_losses, val_losses


# 反标准化函数
def inverse_transform(data, mean, std):
    """
    反标准化数据
    data: 标准化后的数据,形状为 [batch, pred, nodes]
    mean: 均值,形状为 [nodes]
    std: 标准差,形状为 [nodes]
    """
    # 调整mean和std的形状以匹配data的形状 [1, 1, nodes]
    mean_reshaped = mean.reshape(1, 1, -1)
    std_reshaped = std.reshape(1, 1, -1)
    return data * std_reshaped + mean_reshaped


# 评估函数
def evaluate_model(model, test_loader, edge_index, scaler, config, device):
    model.eval()
    predictions = []
    actuals = []

    edge_index = edge_index.to(device)

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)

            batch_size, window_size, num_nodes = data.shape
            data = data.permute(0, 2, 1).reshape(-1, window_size)
            batch_indices = torch.arange(batch_size, device=device).repeat_interleave(num_nodes)

            output = model(data, edge_index, batch_indices)  # [batch, pred, nodes]

            predictions.append(output.cpu().numpy())
            actuals.append(target.cpu().numpy())

    # 合并所有批次
    predictions = np.concatenate(predictions, axis=0)
    actuals = np.concatenate(actuals, axis=0)

    # 反标准化
    mean = scaler.mean_
    std = scaler.scale_
    predictions = inverse_transform(predictions, mean, std)
    actuals = inverse_transform(actuals, mean, std)

    # 计算评估指标
    mse = mean_squared_error(actuals.flatten(), predictions.flatten())
    mae = mean_absolute_error(actuals.flatten(), predictions.flatten())
    rmse = np.sqrt(mse)
    r2 = r2_score(actuals.flatten(), predictions.flatten())

    print(f'\n测试集评估指标:')
    print(f'MSE: {mse:.6f}')
    print(f'MAE: {mae:.6f}')
    print(f'RMSE: {rmse:.6f}')
    print(f'R²: {r2:.6f}')

    return predictions, actuals, {'mse': mse, 'mae': mae, 'rmse': rmse, 'r2': r2}


# 绘制损失曲线
def plot_loss_curve(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='训练损失')
    plt.plot(val_losses, label='验证损失')
    plt.title('训练与验证损失曲线')
    plt.xlabel('Epoch')
    plt.ylabel('MSE损失')
    plt.legend()
    plt.grid(True)
    plt.savefig('loss_curve.png', dpi=300)
    plt.show()


# 绘制预测结果
def plot_predictions(predictions, actuals, node_indices=None, num_samples=3):
    """
    绘制预测结果与实际值对比
    predictions: 预测结果,形状为 [samples, pred, nodes]
    actuals: 实际值,形状为 [samples, pred, nodes]
    node_indices: 要绘制的节点索引列表
    num_samples: 要绘制的样本数量
    """
    # 如果未指定节点,选择前3个节点
    if node_indices is None:
        node_indices = [0, 1, 2]

    # 创建输出目录
    os.makedirs('prediction_plots', exist_ok=True)

    # 绘制每个节点的预测结果
    for node_idx in node_indices:
        plt.figure(figsize=(12, 6))

        # 绘制多个样本
        for i in range(min(num_samples, len(predictions))):
            time_steps = range(actuals[i].shape[0])
            plt.subplot(min(num_samples, len(predictions)), 1, i + 1)
            plt.plot(time_steps, actuals[i, :, node_idx], label='实际值', marker='o', markersize=4)
            plt.plot(time_steps, predictions[i, :, node_idx], label='预测值', marker='x', markersize=4)
            plt.title(f'样本 {i + 1} - OD对 {node_idx + 1} 的预测结果')
            plt.xlabel('时间步')
            plt.ylabel('流量值')
            plt.legend()
            plt.grid(True)

        plt.tight_layout()
        plt.savefig(f'prediction_plots/node_{node_idx + 1}_predictions.png', dpi=300)
        plt.show()


# 主函数
def main():
    # 初始化配置
    config = Config()

    # 加载数据
    print("正在加载数据...")
    df = pd.read_csv('Abilene-OD_pair.csv',nrows=2000)
    print(f"数据加载完成,共 {len(df)} 行,{len(df.columns)} 列")

    # 提取时间序列数据(跳过时间列)
    time_stamps = df['time'].values
    traffic_data = df.iloc[:, 1:].values  # 形状为 [时间步, 节点数]
    config.num_nodes = traffic_data.shape[1]  # 设置节点数量
    print(f"流量数据形状: {traffic_data.shape}")

    # 数据标准化
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(traffic_data)

    # 划分数据集
    n = len(scaled_data)
    train_size = int(n * config.train_ratio)
    val_size = int(n * config.val_ratio)

    train_data = scaled_data[:train_size]
    val_data = scaled_data[train_size:train_size + val_size]
    test_data = scaled_data[train_size + val_size:]

    print(f"数据集划分: 训练集 {len(train_data)} 条, 验证集 {len(val_data)} 条, 测试集 {len(test_data)} 条")

    # 创建数据加载器
    train_dataset = TrafficDataset(train_data, config.window_size, config.pred_size)
    val_dataset = TrafficDataset(val_data, config.window_size, config.pred_size)
    test_dataset = TrafficDataset(test_data, config.window_size, config.pred_size)

    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

    print(f"数据加载器: 训练批次数 {len(train_loader)}, 验证批次数 {len(val_loader)}, 测试批次数 {len(test_loader)}")

    # 构建图结构(使用训练数据)
    print("正在构建图结构...")
    # 使用部分训练数据来构建图,提高效率
    sample_data = train_data[:1000]  # 使用前1000个时间步
    edge_index = build_od_graph(sample_data)

    # 初始化模型、优化器和损失函数
    model = GNNTransformer(config).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    criterion = nn.MSELoss()

    print(f"模型参数数量: {sum(p.numel() for p in model.parameters()):,}")

    # 训练模型
    print("开始训练模型...")
    model, train_losses, val_losses = train_model(
        model, train_loader, val_loader, edge_index,
        optimizer, criterion, config, device
    )

    # 绘制损失曲线
    plot_loss_curve(train_losses, val_losses)

    # 评估模型
    print("开始评估模型...")
    predictions, actuals, metrics = evaluate_model(
        model, test_loader, edge_index, scaler, config, device
    )

    # 绘制预测结果(选择几个有代表性的OD对)
    # 可以根据需要修改要显示的节点索引
    plot_predictions(predictions, actuals, node_indices=[0, 10, 50], num_samples=2)

    # 保存最终模型
    torch.save(model.state_dict(), 'final_model.pth')
    print("模型已保存为 'final_model.pth'")

    # 保存评估结果
    with open('evaluation_results.txt', 'w', encoding='utf-8') as f:
        f.write("评估指标:\n")
        for key, value in metrics.items():
            f.write(f"{key}: {value:.6f}\n")
    print("评估结果已保存为 'evaluation_results.txt'")


if __name__ == "__main__":
    main()

完整代码:

https://download.youkuaiyun.com/download/qq_38735017/91763492

运用图神经网络架构实现化合物能量特性估算的编程资源集 本资源集合提供了一套完整的计算方案,通过深度学习中的图结构建模方法,对化学分子的量子力学性质进行高精度预测。该实现采用模块化设计,包含数据预处理、模型构建、训练优化和结果验证四个核心组成部分。 在技术实现层面,利用分子图结构表征(原子作为节点、化学键作为边)的特性,构建了包含消息传递机制的多层图卷积网络。该网络架构能够自动学习分子拓扑结构与能量属性之间的复杂映射关系,其中包含原子特征嵌入层、邻域信息聚合层和全局特征读取层。 数据集方面提供了经过严格筛选的有机小分子样本,每个样本均包含精确计算的基态能量值。数据预处理流程涵盖分子结构标准化、特征工程处理和数据分割策略,确保模型训练的可靠性与可复现性。 代码实现采用面向对象编程范式,主要依赖PyTorch Geometric深度学习框架,同时兼容常见科学计算库。工程文件结构清晰,包含模型定义、训练循环、评估指标和可视化工具等独立模块,支持GPU加速计算和超参数灵活配置。 该实施方案特别注重模型的可解释性,提供了特征重要性分析和预测误差可视化工具,有助于理解图神经网络在化学领域的决策机制。经过严格验证,该方法在多个基准测试集上均表现出优于传统机器学习方法的预测精度。 资源来源于网络分享,仅用于学习交流使用,请勿用于商业,如有侵权请联系我删除!
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

程序员奇奇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值