OpenNE框架:OpenNE 是一个用于图嵌入(graph embedding)的框架,它集成了多个常用的图嵌入算法,例如 DeepWalk、LINE、Node2vec、HOPE 等。OpenNE 的目标是提供统一的接口和框架,帮助研究人员和开发者快速实现和测试各种图嵌入算法。通过 OpenNE,你可以在各种图结构数据上生成节点的嵌入向量,这些向量可以用在下游的任务中,如节点分类、链接预测和社区检测等。
通过OpenNE,可以对图数据生成嵌入并进行后续分析。
PyTorch:
- 是一个流行的深度学习框架,用于构建和训练神经网络模型。它提供了张量运算的支持,并集成了自动求导、GPU加速等功能。
- PyTorch 被广泛用于各种机器学习任务,包括图像识别、自然语言处理、时间序列预测、强化学习等。
- 与 OpenNE 不同,PyTorch 是一个通用的深度学习框架,可以用于各种深度学习模型的构建和训练。
总的来说:OpenNE 专注于网络嵌入,而 PyTorch 是一个通用的深度学习框架
下面是几种基于openne生成向量的算法:
1.GraRep:
代码如下:
import random
import pandas as pd
import networkx as nx
from src import openne
from src.openne import graph
from src.openne.graph import Graph
from src.openne.grarep import GraRep
node_count = 150
edges = []
for _ in range(200):
node1 = random.randint(0, node_count - 1)
node2 = random.randint(0, node_count - 1)
if node1 != node2:
edges.append((node1, node2))
G = Graph()
G.edge_list = edges
nodes = set()
for edge in G.edge_list:
nodes.update(edge)
G.node_size = len(nodes)
G.nodes = list(nodes)
G.look_up_dict = {node: idx for idx, node in enumerate(G.nodes)}
G.look_back_list = [None] * G.node_size
for node, idx in G.look_up_dict.items():
G.look_back_list[idx] = node
G.G = {}
for u, v in edges:
if u not in G.G:
G.G[u] = []
if v not in G.G:
G.G[v] = []
G.G[u].append(v)
G.G[v].append(u)
model = GraRep(G, Kstep=2, dim=128)
# model.learn_embedding()
embedding = model.save_embeddings("/home/yin/DREAMwalk-main/DREAMwalk-main/embedding_compare/Data/grarep_embeddings.pkl")
2.HOPE:
import random
import pickle
import pandas as pd
import networkx as nx
from src import openne
from src.openne import line
from src.openne import graph
from src.openne.graph import Graph
from src.openne.hope import HOPE
node_count = 150
edges = []
for _ in range(200):
node1 = random.randint(0, node_count - 1)
node2 = random.randint(0, node_count - 1)
if node1 != node2:
edges.append((node1, node2))
class Graph():
def __init__(self, G_nx):
self.G = G_nx
self.look_up_dict = {node: idx for idx, node in enumerate(G_nx.nodes())}
self.look_back_list = {idx: node for node, idx in self.look_up_dict.items()}
self.node_size = G_nx.number_of_nodes()
G_nx = nx.Graph()
G_nx.add_edges_from(edges)
G = Graph(G_nx)
# 初始化 HOPE 模型
hope_model = HOPE(G, 128)
embeddings = hope_model.vectors
3. LINE
import random
import pickle
import pandas as pd
import networkx as nx
from src import openne
from src.openne import line
from src.openne import graph
from src.openne.graph import Graph
from src.openne.line import _LINE
node_count = 150
edges = []
for _ in range(200):
node1 = random.randint(0,node_count - 1)
node2 = random.randint(0,node_count - 1)
if node1 != node2:
edges.append((node1,node2))
G_nx = nx.Graph()
G_nx.add_edges_from(edges)
# 为每条边设置权重 (假设权重都为1.0)
for u, v in G_nx.edges():
G_nx[u][v]['weight'] = 1.0
# 定义 look_up_dict
look_up_dict = {node: idx for idx, node in enumerate(G_nx.nodes())}
# 定义一个 Graph 对象
class Graph():
def __init__(self, G_nx):
self.G = G_nx # 将 networkx 图加载到 G
self.look_up_dict = {node: idx for idx, node in enumerate(G_nx.nodes())} # 节点 -> 索引
self.look_back_list = {idx: node for node, idx in self.look_up_dict.items()} # 索引 -> 节点
self.node_size = G_nx.number_of_nodes() # 获取节点数量
G = Graph(G_nx)
# 使用 LINE 模型
line_model = _LINE(G)
# 训练多轮 (例如10轮)
for epoch in range(10):
line_model.train_one_epoch() # 每次调用此函数,都会进行一轮训练
# 获取训练好的节点嵌入
embeddings = line_model.get_embeddings()
4.Node2vec(不依赖于openne)
import random
import pickle
import pandas as pd
import networkx as nx
from node2vec import Node2Vec
node_count = 150
edges = []
for _ in range(200):
node1 = random.randint(0, node_count - 1)
node2 = random.randint(0, node_count - 1)
if node1 != node2:
edges.append((node1, node2))
G = nx.Graph()
G.add_edges_from(edges)
node2vec = Node2Vec(G, dimensions=128, walk_length=10, num_walks=100, p=1, q=1, workers=4)
model = node2vec.fit(window=4, min_count=1, batch_words=4)
unique_nodes = set()
for edge in edges:
unique_nodes.update(edge)
node_vectors = {}
for node_id in unique_nodes:
node_vector = model.wv[str(node_id)]
node_vectors[node_id] = node_vector
总结:Node2vec 通过受控的随机游走平衡局部和全局结构信息。模型将网络节点表示成向量后,节点的1到k阶关系全都综合反映到这个向量中。而GraRep则选择将1到k总共k种关系分开,分别形成k个向量,每个向量表示一种关系。例如上述GraRep代码中,model = GraRep(G, Kstep=2, dim=128) 。LINE无法获取到k>2的节点关系,强调一阶和二阶邻接关系,适合大规模网络。HOPE 注重保持高阶邻接关系,适用于有向网络。其复杂度排序为:GraRep > HOPE > Node2vec > LINE