一、建立基本的知识图谱并导入neo4j
这里我举例用的属性表、关系表,大概格式如下
id | 名字 | 颜色 |
---|---|---|
a1 | 苹果 | 红色 |
start | rel | end |
---|---|---|
a1 | 属于 | b1 |
启动neo4j(关于neo4j的安装此处不再赘述)
import pandas as pd
from py2neo import Graph, Node, Relationship
graph = Graph("bolt://localhost:7687", auth=("neo4j", "xxxxxx")) # Xxxxx是你自己的密码
# 导入属性表
def create_nodes(file_name, label, tx):
df_1 = pd.read_csv("./graph_data/"+ file_name)
df = df_1.dropna(subset=['id'])
for index, row in df.iterrows():
node = Node(label, id=row['id'], **row.drop('id').to_dict())
tx.merge(node, label, "id")
attribute_tables = {
"attributes_a.csv":"a",
"attributes_b.csv":"b",
}
tx = graph.begin()
try:
for file_name, label in attribute_tables.items():
create_nodes(file_name, label, tx)
# 导入关系表
relationship_types = {
"rel_a_b.csv": "属于",
}
for file_name, rel_type in relationship_types.items():
df = pd.read_csv("./graph_data/"+ file_name)
for index, row in df.iterrows():
start_label, end_label = file_name.split("_")[1], file_name.split("_")[2].split(".")[0] # 这里的start_label, end_label根据自己的实际需求来定
start_node = graph.nodes.match(start_label, id=row['start']).first()
end_node = graph.nodes.match(end_label, id=row['end']).first()
if start_node and end_node:
rel = Relationship(start_node, rel_type, end_node)
tx.merge(rel)
else:
print(f"无法找到节点: {row['start']} 或 {row['end']}")
tx.commit()
print("成功导入!")
except Exception as e:
tx.rollback()
print(f"导入失败:{e}")
运行后在neo4j中的效果大概是这样: