1、环境
conda create -n langchain_rag python=3.10
conda activate langchain_rag//激活环境
pip install langchain langchain-ollama langchain-chroma//安装依赖项
pip install pandas
pip install ragas//0.2.14版本
pip install langchain-community
2、运行ollama上面的embedding模型和LLM模型,我用的是quentinz/bge-large-zh-v1.5和qwen2.5:7b.
3、代码:
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
import os
import pandas as pd
# ✅ RAGAS 相关
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from datasets import Dataset
# 初始化模型
model = OllamaLLM(model="qwen2.5:7b")
embeddings = OllamaEmbeddings(model="quentinz/bge-large-zh-v1.5")
# 封装为 RAGAS 兼容格式
ragas_llm = LangchainLLMWrapper(model)
ragas_embedding = LangchainEmbeddingsWrapper(embeddings)
# 数据库路径和 CSV 读取
db_location = "./chroma_langchain_db"
df = pd.read_csv("dataset.csv")
add_documents = not os.path.exists(db_location)
# 文档构建
if add_documents:
documents = []
ids = []
for i, row in df.iterrows():
doc = Document(
page_content=row["问题"] + " " + row["答案"],
metadata={"答案字数": row["答案字数"]},
id=str(row["id"])
)
documents.append(doc)
ids.append(str(row["id"]))
# 向量数据库
vector_store = Chroma(
collection_name="dataset",
persist_directory=db_location,
embedding_function=embeddings
)
if add_documents:
vector_store.add_documents(documents=documents, ids=ids)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
# Prompt 模板和 QA链
template = """
你是一位纺织品和服装方面的专家。
以下是一些相关资料:{reviews}
请根据这些资料,回答用户的问题:{question}
"""
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model
# 交互式输入 + RAGAS 评估
while True:
print("\n🧵 请输入问题 (输入 q 退出):")
question = input("👉 ")
if question.lower() == "q":
break
# 检索文档
docs = retriever.invoke(question)
context_docs = [doc.page_content for doc in docs]
context_str = "\n".join(context_docs)
print("\n🔍 检索到的内容:")
for i, doc in enumerate(docs):
print(f"[文档 {i+1}]: {doc.page_content}")
# 生成回答
result = chain.invoke({"reviews": context_str, "question": question})
print("\n🤖 回答:")
print(result)
# ✅ RAGAS 评估准备(无监督指标)
sample = {
"question": question,
"contexts": context_docs,
"answer": result
}
print("\n🧪 正在使用 RAGAS 进行评估...")
try:
dataset = Dataset.from_list([sample])
eval_result = evaluate(
dataset=dataset,
metrics=[faithfulness, answer_relevancy],
llm=ragas_llm,
embeddings=ragas_embedding
)
print("\n📊 RAGAS 评估结果:")
print(eval_result)
except Exception as e:
print("\n❌ 评估出错:", e)
我使用的数据库格式是:(.csv格式的文件)
要求每个文档的 ID 是唯一的,编码格式要求:UTF-8 编码
输出结果: