接上一篇 《AI大模型开发笔记》企业RAG技术实战(一)https://mp.youkuaiyun.com/mp_blog/creation/editor/146381354
使用llamaindex实例
https://docs.llamaindex.ai/en/stable/api_reference/
环境配置
我们继续使用前面langchain例子的python虚环境,不用新建,激活就行
不同LLM环境配置
#conda创建 python=3.10版本的虚环境
#conda create -n llmrag python=3.10
#激活conda创建的名字叫llmrag的虚环境
conda activate llmrag
#torch安装
conda install pytorch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 pytorchcuda=12.1 -c pytorch -c nvidia
#安装依赖
pip install llama_hub llama_index llama-index-readers-web trafilatura
pip install llama-index-vector-stores-chroma #支持chroma向量数据库
pip install llama-index-embeddings-huggingface
#用openai的模型
pip install openai
#如果是本地部署ollama
pip install llama-index-llms-ollama
#通义千问线上版
pip install llama-index-llms-dashscope
api_key设置
api_key环境变量和langchain项目方法一样
DASHSCOPE_API_KEY="YOUR_DASHSCOPE_API_KEY"
import dotenv
dotenv.load_dotenv()
安装chroma
安装chroma轻量级向量数据库,因为它轻量并且支持windows,不需要wsl,不需要docker
pip install chromadb #安装
chroma run #运行
准备数据
在这个例子以及后面的例子中,语料库我们都将使用百度百科关于aigc的知识:
https://baike.baidu.com/item/AIGC?fromModule=lemma_search-box
完整代码
from llama_index.readers.web import TrafilaturaWebReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.schema import IndexNode
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, StorageContext
#from llama_index.llms.openai import OpenAI
#from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
def prepare_data():
url="https://baike.baidu.com/item/AIGC?fromModule=lemma_search-box"
docs = TrafilaturaWebReader().load_data([url])
return docs
#embed保存知识到向量数据库
def embedding_data(docs):
#向量数据库客户端
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("quickstart")
#向量数据库, 指定了存储位置
vector_store =
ChromaVectorStore(chroma_collection=chroma_collection,persist_dir="./chroma_langc
hain_db")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
#创建文档切割器
node_parser = SimpleNodeParser.from_defaults(chunk_size=500,chunk_overlap=50)
#创建BAAI的embedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-zh-v1.5")
#创建index
base_index = VectorStoreIndex.from_documents(documents=docs,transformations=
[node_parser],storage_context=storage_context, embed_model=embed_model)
return base_index,embed_model
def get_llm():
#创建OpenAI的llm
#llm = OpenAI(model="gpt-3.5-turbo")
#通义千问
'''
from llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
'''
#ollama本地模型
llm = Ollama(model="qwen2:7b-instruct-q4_0", request_timeout=120.0)
#创建谷歌gemini的llm
# llm = Gemini()
return llm
def retrieve_data(question):
#创建检索器
base_retriever = base_index.as_retriever(similarity_top_k=2)
#检索相关文档
retrievals = base_retriever.retrieve(
question
) #
print(retrievals)
#https://docs.llamaindex.ai/en/stable/examples/low_level/response_synthesis/
from llama_index.core.response.notebook_utils import display_source_node
for n in retrievals:
display_source_node(n, source_length=1500)
return retrievals
def generate_answer(question):
query_engine = base_index.as_query_engine()
#大语言模型的回答
response = query_engine.query(
question
)
print(str(response))
question="艾伦•图灵的论文叫什么"
docs=prepare_data()
llm=get_llm()
base_index,embed_model=embedding_data(docs)
#通过设置来配置 llm,embedding等等
Settings.llm = llm
Settings.embed_model = embed_model
#Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 512
Settings.context_window = 3000
retrieve_data(question)
generate_answer(question)
Modular RAG
论文Modular RAG: Transforming RAG Systems into LEGO-like Reconfigurable Frameworks: https://arxiv.org/pdf/2407.21059
推理阶段
网易开源的QAnything
比如网易开源的QAnything: https://github.com/netease-youdao/QAnything
QAnything使用的检索组件BCEmbedding: https://github.com/netease-youdao/BCEmbedding有非常强悍的双语和跨语种能力
bce-embedding-base_v1和bce-reranker-base_v1的组合是SOTA
重写-检索-阅读(RRR)
重写-检索-阅读(RRR)也是典型的顺序结构(https://arxiv.org/pdf/2305.14283.pdf)。
条件模式
条件 RAG 的经典实现是semantic-router这个项目: https://github.com/aurelio-labs/semantic-router
llamaindex实现例子代码
#定义查询引擎和工具
from llama_index.core.tools import QueryEngineTool
list_query_engine = summary_index.as_query_engine(
response_mode="tree_summarize", use_async=True) v
ector_query_engine = vector_index.as_query_engine(
response_mode="tree_summarize", use_async=True
) #
description, 是QueryEngineTool元数据, 用途描述。 这有助于路由器查询引擎根据
ToolRetrieverRouterQueryEngine的查询路由那个tool
list_tool = QueryEngineTool.from_defaults(
query_engine=list_query_engine,
description="Useful for questions asking for a biography of the author.",
) v
ector_tool = QueryEngineTool.from_defaults(
query_engine=vector_query_engine,
description=(
"Useful for retrieving specific snippets from the author's life, like"
" his time in college, his time in YC, or more."
),
) #
定义路由查询引擎
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex
#从这2个中路由
obj_index = ObjectIndex.from_objects(
[list_tool, vector_tool],
index_cls=VectorStoreIndex,
) f
rom llama_index.core.query_engine import ToolRetrieverRouterQueryEngine
query_engine = ToolRetrieverRouterQueryEngine(obj_index.as_retriever())
#根据query, 和description进行相似度比较
response = query_engine.query("What is a biography of the author's life?")
print(str(response))
迭代检索
迭代检索的一个典型案例是ITER-RETGEN : http