try:
from langchain_ollama import OllamaEmbeddings # 优先使用新版本
except ImportError:
from langchain_community.embeddings import OllamaEmbeddings # 回退到旧版本
from langchain_community.vectorstores import FAISS
import time
from langchain_core.prompts import PromptTemplate
from langchain_ollama import OllamaLLM
from langchain_community.document_compressors import FlashrankRerank
import os
from typing import Generator, List, Tuple, Any
path = "/mlkj/hinge/m220/anaconda3/envs/llm/llm_answer/myflashrank"
import os
from typing import List, Tuple, Set
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.documents import Document
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain.retrievers.document_compressors.base import DocumentCompressorPipeline
def query_vectors(query, db_dir, db_paths, k1, k2, k3):
"""
:param query: 用户问题
:param db_dir: 向量库目录位置
:param db_paths: 向量库文件列表
:param k1: 每个向量库初始检索数量
:param k2: 向量检索后保留的总数(去重后)
:param k3: 每个 reranker 模型返回的 top-k 数量
:return: 合并后的 (Document, score) 列表
"""
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
all_results = []
# 第一步:从每个向量库中检索 top-k1
for db_path in db_paths:
try:
db = FAISS.load_local(
folder_path=os.path.join(db_dir, db_path),
embeddings=embeddings,
allow_dangerous_deserialization=True
)
docs_scores = db.similarity_search_with_score(query, k=k1)
all_results.extend([(doc, score) for doc, score in docs_scores])
except Exception as e:
print(f"查询 #{db_path} 失败: {str(e)}")
# 第二步:基于内容去重
seen_contents = set()
unique_results = []
for doc, score in all_results:
if doc.page_content not in seen_contents:
seen_contents.add(doc.page_content)
unique_results.append((doc, score))
# 第三步:按向量分数取 top-k2(越小越相关)
top_k_by_vector = sorted(unique_results, key=lambda x: x[1])[:k2]
candidate_docs = [doc for doc, _ in top_k_by_vector]
# 第四步:使用两个不同模型分别重排序
model1 = "ms-marco-MiniLM-L-12-v2"
model2 = "ms-marco-MultiBERT-L-12"
# 创建两个 Flashrank reranker
compressor1 = FlashrankRerank(model=model1, top_n=k3, cache_dir=path)
compressor2 = FlashrankRerank(model=model2, top_n=k3, cache_dir=path)
# 分别重排序
reranked_docs_1 = compressor1.compress_documents(documents=candidate_docs, query=query)
reranked_docs_2 = compressor2.compress_documents(documents=candidate_docs, query=query)
# 第五步:合并结果(并集)并去重
final_docs: List[Document] = []
seen: Set[str] = set()
# 用于重建原始向量分数
doc_to_score = {doc.page_content: score for doc, score in top_k_by_vector}
for doc_list in [reranked_docs_1, reranked_docs_2]:
for doc in doc_list:
content = doc.page_content
if content not in seen:
seen.add(content)
final_docs.append(doc)
# 重建结果:保留原始向量分数
final_results = [
(doc, doc_to_score[doc.page_content])
for doc in final_docs
if doc.page_content in doc_to_score
]
return final_results
def get_related_content(related_docs_with_scores):
"""
对文档内容进行拼接
:param related_docs_with_scores: 包含分数和文档的元组列表 [(Document, score), ...]
:return: 拼接后的内容字符串
"""
related_content = []
for doc, score in related_docs_with_scores:
related_content.append(doc.page_content.replace("\n\n", "\n"))
return "\n".join(related_content)
def define_prompt(docs, question):
"""
接收问题,返回基于文档内容的提示模版
:return:
"""
related_content = get_related_content(docs)
PROMPT_TEMPLATE = """
你是一个专业的知识库问答AI助手,请严格按照以下规则回答问题:
【回答规则】
1. 知识库优先原则
- 若上下文存在明确答案:
* 回答格式:
「根据知识库有关内容可知:[具体回答内容]」
* 流程类问题分步骤说明(1. 第一步... 2. 第二步...)
- 若上下文无答案但属于以下情况:
* 数学公式/物理定律等基础科学知识
* 广泛认可的常识(如国家首都等)
* 回答格式:
「在已知知识库中未找到相关信息,根据通用知识回答如下:[回答内容]」
- 完全无法回答时:
* 回答格式:
「抱歉,未能找到相关信息,无法做出回答」
2. 严格性要求
- 禁止对知识库内容进行推测或扩展
- 非知识库内容必须明确声明
【上下文】
{context}
【问题】
{question}
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=PROMPT_TEMPLATE,
)
my_pmt = prompt.format(context=related_content, question=question)
return my_pmt
def get_stream_response(model_name: str, docs: List[Tuple[Any, float]], question: str) -> Generator[str, None, None]:
"""
流式回答生成器函数
:param model_name: 模型名称
:param docs: 检索到的文档列表
:param question: 用户问题
:return: 生成器,逐个返回回答的chunk
"""
try:
# 1. 初始化模型(启用流式)
model = OllamaLLM(model=model_name, stream=True, temperature=0)
# 2. 生成提示词
my_pmt = define_prompt(docs, question)
print(f'提示词长度: {len(my_pmt)} 字符')
print(f'提示词:{my_pmt}')
# 3. 使用stream()方法获取流式响应
for chunk in model.stream(my_pmt):
yield chunk
except Exception as e:
error_msg = f"流式回答过程发生错误:{str(e)}"
print(error_msg)
yield error_msg
def process_answer_with_stream(model_name: str, query: str, db_dir: str, db_paths: List[str], k1: int, k2: int,
k3: int,final_k):
"""
处理问答并返回流式响应和引用文件信息
:return: (流式生成器, 引用文件列表)
"""
try:
# 1. 检索相关文档
results = query_vectors(query, db_dir, db_paths, k1, k2, k3)
# 2. 在这里进行排序和过滤
# 先按向量距离分数升序(越小越相关),再按Flashrank相关性分数降序(越大越相关)
sorted_results = sorted(
results,
key=lambda x: (
x[1], # 向量距离分数(升序)
-x[0].metadata.get("relevance_score", 0) # Flashrank分数(取负号实现降序)
)
)
# 3. 取前final_k个结果
final_results = sorted_results[:final_k]
# 4. 过滤和显示:只保留 Flashrank 相关性分数 > 0.4 的文档
filtered_results = []
for result in final_results:
doc, vector_score = result
relevance_score = doc.metadata.get("relevance_score", 0)
# 过滤条件:Flashrank 相关性分数 > 0.4
if relevance_score > 0.4:
filtered_results.append(result)
# print(f'文本为:{doc.page_content}')
# print(f'向量距离分数: {vector_score} (越小越相关)')
# print(f'相关性分数: {relevance_score} (越大越相关)')
# print('________________________' * 3)
# print(f"原始文档数量: {len(results)}")
# print(f"排序截取后数量: {len(final_results)}")
# print(f"过滤后文档数量: {len(filtered_results)}")
filtered_results = list(reversed(filtered_results))
use_files = set()
for doc, _ in results:
if hasattr(doc, 'metadata') and 'source' in doc.metadata:
use_files.add(doc.metadata['source'])
# 3. 创建流式响应生成器
stream_generator = get_stream_response(model_name, filtered_results, query)
return stream_generator, list(use_files)
except Exception as e:
error_msg = f"处理过程发生错误:{str(e)}"
print(error_msg)
def error_generator():
yield error_msg
return error_generator(), []
# 原有的answer函数可以保留作为兼容,或者修改为使用流式版本
def answer(model_name, query, db_dir, db_paths, k1, k2, k3,final_k):
"""
兼容原有接口,但改为返回流式生成器
"""
return process_answer_with_stream(model_name, query, db_dir, db_paths, k1, k2, k3,final_k)
# 使用示例
if __name__ == "__main__":
# 示例用法
k1 = 15
k2 = 40
k3 = 15
final_k = 18
db_dir = "Faiss"
db_paths = os.listdir(db_dir)
print('向量文件是',db_paths)
query = "教育培训经费中列支的项目包括哪些"
model_name = "qwen2.5:7b"
# 获取流式响应
stream_generator, use_files = answer(model_name, query, db_dir, db_paths, k1, k2, k3,final_k)
print("引用文件:", use_files)
print("回答内容:")
# 逐个打印流式响应的chunk
full_response = ""
for chunk in stream_generator:
print(chunk, end="", flush=True)
full_response += chunk
那我这个排序流程还适用吗
最新发布