图神经网络_task03

经典图神经网络学习

本文主要参考DataWhale图神经网络组队学习
首先本文主要是利用图网络模型进行节点级别的任务,如节点分类。
在这里插入图片描述

Cora数据集介绍

Cora是一个论文引用网络,节点代表论文,共有2708篇论文,如果两篇论文存在引用关系,则对应的两个节点之间存 在边,各节点的属性都是一个1433维的词包特征向量。

from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
dataset = Planetoid(root='dataset/Cora', name='Cora', 
                    transform=NormalizeFeatures())

查看特征矩阵

data = dataset[0]
data.x.shape

在这里插入图片描述
查看数据集标签

data.y    # shape ---> torch.Size([2708])

在这里插入图片描述

构建MLP对Cora数据集进行节点分类

我们的MLP由两个线程层、一个ReLU非线性层和一个dropout操作组成。第一个线 程 层 将 1433 维 的 节 点 表 征 嵌 入 ( embedding ) 到 低 维 空 间 中 ( hidden_channels=16 ) , 第 二 个 线 性 层 将 节 点 表 征 嵌 入 到 类 别 空 间 中 (num_classes=7)。

import torch
from torch.nn import Linear
import torch.nn.functional as F

class MLP(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(MLP, self).__init__()
        self.lin1 = Linear(dataset.num_features, hidden_channels)
        self.lin2 = Linear(hidden_channels, dataset.num_classes)
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

model = MLP(hidden_channels=16)
print(model)
model = MLP(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

开始训练,此处是半监督学习,因为只用到了140个训练标签进行训练

data.train_mask.sum().item()  #  140
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

for epoch in range(1,201):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')

开始测试分类准确率:

def test():
    model.eval()
    out = model(data.x)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')

在这里插入图片描述

构建GCN用于Cora数据集节点分类

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.conv_1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv_2 = GCNConv(hidden_channels, dataset.num_classes)
        
    def forward(self, x, edge_index):
        x = F.relu(self.conv_1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv_2(x, edge_index)
        return F.log_softmax(x, dim=1)

开始训练

model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss
    
for epoch in range(1,201):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')

开始测试:

def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')   

在这里插入图片描述

构建GAT用于Cora数据集节点分类

import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv

class GAT(torch.nn.Module):
    def __init__(self,hidden_channels):
        super(GAT,self).__init__()
        self.gatconv1 = GATConv(dataset.num_features, hidden_channels, heads=8, dropout=0.6)
        self.gatconv2 = GATConv(8 * hidden_channels, dataset.num_classes, heads=1, concat=False, dropout=0.6)
          
    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv1(x, edge_index)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

开始训练

model = GAT(hidden_channels=8)
optimizer = torch.optim.Adam(model.parameters(),lr=0.005, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss
    
for epoch in range(1,801):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')

开始测试:

def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')   

在这里插入图片描述

可视化

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color
                #, cmap='Paired'
               )
    plt.show()

将训练好的GAT模型学习到的节点表征进行可视化(仅对测试节点进行可视化)

out = model(data.x, data.edge_index)
visualize(out[data.test_mask],data.y[data.test_mask])

在这里插入图片描述

作业

使用PyG中不同的图卷积模块在PyG的不同数据集上实现节点分类或回归任务。

获取CiteSeer数据集

from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
dataset = Planetoid(root='dataset/CiteSeer', name='CiteSeer', transform=NormalizeFeatures())
data = dataset[0]
data.x.shape

在这里插入图片描述

构建GCN对CiteSeer数据集进行节点分类

import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self,hidden_channels):
        super(GCN,self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features,hidden_channels)
        self.conv2 = GCNConv(hidden_channels,dataset.num_classes)
          
    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x,edge_index))
        x = F.dropout(x, p=0.5, training = self.training)
        x = self.conv2(x,edge_index)
        
        return F.log_softmax(x,dim=1)
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01,weight_decay=5e-4)
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask],data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss 

for epoch in range(1,201):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')
def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

print(f'Test Accuracy: {test().item():.4f}')

在这里插入图片描述

可视化

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color
                #, cmap='Paired'
               )
    plt.show()

out = model(data.x, data.edge_index)
visualize(out[data.test_mask], data.y[data.test_mask])

在这里插入图片描述

构建GAT对CiteSeer数据集进行节点分类(利用GPU加速)

import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GATConv

class GAT(torch.nn.Module):
    def __init__(self,hidden_channels):
        super(GAT,self).__init__()
        self.gatconv1 = GATConv(dataset.num_features, hidden_channels, heads=8, dropout=0.6)
        self.gatconv2 = GATConv(8 * hidden_channels, dataset.num_classes, heads=1, concat=False, dropout=0.6)
          
    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv1(x, edge_index)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
model = GAT(hidden_channels=8).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.005, weight_decay=5e-4)

开始训练:

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss
    
for epoch in range(1,801):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')
def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')

在这里插入图片描述

获取PubMed数据集

from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
dataset = Planetoid(root=r'D:\图数据实验数据集\PubMed', name='PubMed', 
                    transform=NormalizeFeatures())

查看特征矩阵:

data = dataset[0]
data.x.shape

在这里插入图片描述

构建GCN对PubMed数据集进行节点分类

import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self,hidden_channels):
        super(GCN,self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features,hidden_channels)
        self.conv2 = GCNConv(hidden_channels,dataset.num_classes)
          
    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x,edge_index))
        x = F.dropout(x, p=0.5, training = self.training)
        x = self.conv2(x,edge_index)
        
        return F.log_softmax(x,dim=1)

开始训练

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
model = GCN(hidden_channels=16).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

for epoch in range(1,201):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')
def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')

在这里插入图片描述

可视化

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color.cpu().detach().numpy()
                #, cmap='Paired'
               )
    plt.show()

out = model(data.x, data.edge_index)
visualize(out[data.test_mask], data.y[data.test_mask])

在这里插入图片描述

构建GAT对PubMed数据集进行节点分类

import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GATConv

class GAT(torch.nn.Module):
    def __init__(self,hidden_channels):
        super(GAT,self).__init__()
        self.gatconv1 = GATConv(dataset.num_features, hidden_channels, heads=8, dropout=0.6)
        self.gatconv2 = GATConv(8 * hidden_channels, dataset.num_classes, heads=1, concat=False, dropout=0.6)
          
    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv1(x, edge_index)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.gatconv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

开始训练:

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
model = GAT(hidden_channels=8).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01, weight_decay=0.001)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

for epoch in range(1,201):
    loss = train()
    print(f'Epoch:{epoch:03d}, Loss:{loss:.4f}')

开始测试:

def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / data.test_mask.sum()
    return test_acc

test_acc = test()
print(f'Test Accuracy:{test_acc:.4f}')

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值