前言:近期在引入阿里的 TuGraph 图数据库,需要将 原 neo4j 数据导入到新的 tugraph 数据库中。预期走csv文件导入导出,但因为格式和数据库设计问题,操作起来比较麻烦(可能是个人没有发现其他比较方便的办法),因此写了一个 python 脚本进行数据导入操作。
使用:python3,TuGraph 4.5.1
遇到的问题:tugraph 的节点需要一个主键,这个只能自行指定。
支持:指定节点,指定边。自动创建不存在的节点/边,数据导入批量导入节点,单条导入边(试过批量的,tugraph好像不支持 官网的 CALL db.upsertEdge 我的版本也还没实现)。
导入图示:

代码文件
# import time
import json
from typing import Dict, List, cast
class GraphConnector():
db_type: str = "tugraph"
driver: str = "bolt"
dialect: str = "cypher"
batch_size: int = 100
# 指定节点的主键
node_pro_key: dict = dict({
'Ren':'zjhm','Aj':'ajbh','Che':'rowkey','Hh':'rowkey','Sj':'dhhm'})
#指定需要导入的边
specified_relation = ['ajgx','th','tfj','sysj','sycl']
#指定需要导入的节点
specified_node = ['Ren','Aj','Che','Sj','Hh']
def __init__(self, driver, graph):
"""Initialize the connector with a Neo4j driver."""
self._driver = driver
self._schema = None
self._graph = graph
self._session = None
@classmethod
def from_uri_db(
cls, host: str, port: int, user: str, pwd: str, db_name: str, db_type: str
) -> "GraphConnector":
"""Create a new TuGraphConnector from host, port, user, pwd, db_name."""
try:
from neo4j import GraphDatabase
cls.db_type = db_type
db_url = f"{
cls.driver}://{
host}:{
str(port)}"
driver = GraphDatabase.driver(db_url, auth=(user, pwd))
driver.verify_connectivity()
return cast(GraphConnector, cls(driver=driver, graph=db_name))
except ImportError as err:
raise ImportError(
"neo4j package is not installed, please install it with "
"`pip install neo4j`"
) from err
def create_graph_new(self, graph_name: str) -> bool:
"""Create a new graph in the database if it doesn't already exist."""
try:
with self._driver.session() as session:
graph_list = session.run("CALL dbms.graph.listGraphs()").data()
exists = any(item["graph_name"] == graph_name for item in graph_list)
if not exists:
session.run(
f"CALL dbms.graph.createGraph('{
graph_name}', '', 2048)"
)
except Exception as e:
raise Exception(f"Failed to create graph '{
graph_name}': {
str(e)}") from e
return not exists
def create_vertex_labels(self, json_data):
try:
with self._driver.session(database=self._graph) as session:
# graph_list = session.run(f"CALL db.createVertexLabelByJson({json_data})").data()
session.run(
"CALL db.createVertexLabelByJson($json_data)",
json_data=json_data
)
except Exception as e:
raise Exception(f"Failed to create vertex_labels ") from e
# 批量更新节点,没有就新增,有就更新
def batch_update_node(self, json_data):
try:
with self._driver.session(database=self._graph) as session:
# graph_list = session.run(f"CALL db.createVertexLabelByJson({json_data})").data()
session.upsertVertex(
"CALL db.upsertVertex($json_data)",
json_data=json_data
)
except Exception as e:
raise Exception(f"Failed to create vertex_labels ") from e
# 批量更新关系,没有就新增,有就更新
def batch_update_edge(self, json_data):
try:
with self._driver.session(database=self._gra

最低0.47元/天 解锁文章

被折叠的 条评论
为什么被折叠?



