1.加载本地ollama模型:
#pip install langchain==0.2.16
#pip install langchain_community==0.2.16
#加载本地大模型代码快
from langchain_community.llms import Ollama
model = Ollama(model="qwen1_5-4b-chat-q2_k")
2.加载并配置pg向量数据库:
#pip install -qU langchain-postgres -i https://pypi.tuna.tsinghua.edu.cn/simple
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector
CONNECTION_STRING = "postgresql+psycopg2://postgres:qaz142434@192.168.159.130:5432/postgres"
# 矢量存储名
COLLECTION_NAME = "state_of_the_union_test"
# 建立索引库
vector = PGVector.from_documents(
embedding=embeddings,
documents=docs,
collection_name=COLLECTION_NAME,
connection=CONNECTION_STRING,
use_jsonb=True,
pre_delete_collection=True,
)
3.切分网络资源为数据块:
import bs4
import os
#解决WebBaseLoader 报错 (USER_AGENT environment variable not set, consider setting it to identify your requests)
os.environ['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# 切分网络资源为数据块.
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(document)
4.加载嵌入ollama模型:
#pip install langchain_ollama -i https://pypi.tuna.tsinghua.edu.cn/simple
from langchain_ollama import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="lrs33/bce-embedding-base_v1",base_url="http://localhost:11434/")
5.输出暂时搁置之后会以评论形式贴上 流式输出代码: