Day30下 - RAG系统

最新推荐文章于 2025-11-26 14:12:29 发布

原创

最新推荐文章于 2025-11-26 14:12:29 发布 · 1k 阅读

14 ·

CC 4.0 BY-SA版权

文章标签：

#数据库 #RAG #深度学习 #人工智能

一、入库文档，问答查询

1. 数据读取

from langchain_core.documents import Document
import random

file_name="knowledge/熬夜救星护肤霜.txt"
with open(file=file_name, mode="r", encoding="utf-8") as f:
    data = f.read()


# 随机生成角色
def get_role():
    if random.random() >= 0.5:
        return "admin"
    else:
        return "user"


chunks = [chunk.strip() for chunk in data.split(sep="###") if chunk.strip()]

documents = []
for idx, chunk in enumerate(chunks, start=1):
        print(chunk)
        doc = Document(page_content=chunk, metadata={"role":get_role(), 
                                                     "file_name":"熬夜救星护肤霜.txt",
                                                    "segment":f"第{idx}段"})
        documents.append(doc)


len(documents)

2. 数据入库

from chromadb import HttpClient
from langchain_chroma import Chroma
from models import get_embed

client = HttpClient(host="localhost",port=8000)
embed = get_embed()
db = Chroma(embedding_function=embed, client=client)

db.get()
db.add_documents(documents=documents)

3. 读取查询

results = db.similarity_search_with_relevance_scores(query="熬夜救星护肤霜是谁研发的？",
                                           k=4,
                                          score_threshold=0.7)  

my_context = []
my_docs = []
for doc, score in results:
    if doc.metadata["role"] == "user":
        my_docs.append(doc)
        my_context.append(doc.page_content)