一、入库文档,问答查询
1. 数据读取
from langchain_core.documents import Document
import random
file_name="knowledge/熬夜救星护肤霜.txt"
with open(file=file_name, mode="r", encoding="utf-8") as f:
data = f.read()
# 随机生成角色
def get_role():
if random.random() >= 0.5:
return "admin"
else:
return "user"
chunks = [chunk.strip() for chunk in data.split(sep="###") if chunk.strip()]
documents = []
for idx, chunk in enumerate(chunks, start=1):
print(chunk)
doc = Document(page_content=chunk, metadata={"role":get_role(),
"file_name":"熬夜救星护肤霜.txt",
"segment":f"第{idx}段"})
documents.append(doc)
len(documents)
2. 数据入库
from chromadb import HttpClient
from langchain_chroma import Chroma
from models import get_embed
client = HttpClient(host="localhost",port=8000)
embed = get_embed()
db = Chroma(embedding_function=embed, client=client)
db.get()
db.add_documents(documents=documents)
3. 读取查询
results = db.similarity_search_with_relevance_scores(query="熬夜救星护肤霜是谁研发的?",
k=4,
score_threshold=0.7)
my_context = []
my_docs = []
for doc, score in results:
if doc.metadata["role"] == "user":
my_docs.append(doc)
my_context.append(doc.page_content)