提供详细的python代码，基于微信公众号健康养生文章的知识库构建与问答系统

最新推荐文章于 2025-04-10 18:04:34 发布

程序员Thomas

最新推荐文章于 2025-04-10 18:04:34 发布

阅读量417

点赞数 3

分类专栏： python deepseek 养生文章标签： python 微信开发语言

本文链接：https://blog.youkuaiyun.com/lbh73/article/details/146553380

版权

python 同时被 3 个专栏收录

28 篇文章

订阅专栏

deepseek

1 篇文章

订阅专栏

养生

1 篇文章

订阅专栏

基于微信公众号健康养生文章的知识库构建与问答系统

下面是一个基于您需求的Python源代码实现。这个程序会从指定的微信公众号文章中提取健康养生知识，构建知识库，并利用DeepSeek模型来回答用户的健康养生问题。

import os
import re
import json
import time
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import HumanMessage, AIMessage
from langchain import PromptTemplate
from langchain.llms import DeepSeek
import wechat_articles_scraper  # 假设有一个微信文章抓取库

# 配置
WECHAT_ACCOUNTS = ["健康养生公众号1", "健康养生公众号2"]  # 您关注的微信公众号名称
KNOWLEDGE_BASE_DIR = "knowledge_base"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
DEEPSEEK_API_KEY = "your_deepseek_api_key"  # 替换为您的DeepSeek API密钥

# 确保知识库目录存在
os.makedirs(KNOWLEDGE_BASE_DIR, exist_ok=True)

def fetch_wechat_articles(accounts):
    """从微信公众号抓取文章"""
    articles = []
    for account in accounts:
        try:
            # 这里使用假设的微信文章抓取库来获取文章列表
            scraper = wechat_articles_scraper.WeChatScraper()
            account_articles = scraper.scrape_articles(account)
            articles.extend(account_articles)
        except Exception as e:
            print(f"抓取微信公众号文章时出错: {e}")
    return articles

def save_articles_to_knowledge_base(articles):
    """将文章保存到知识库"""
    for idx, article in enumerate(articles):
        file_path = os.path.join(KNOWLEDGE_BASE_DIR, f"article_{idx}.txt")
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(f"标题: {article['title']}\n")
            f.write(f"内容: {article['content']}\n")
            f.write(f"链接: {article['url']}\n")

def load_knowledge_base():
    """加载知识库"""
    documents = []
    for file_name in os.listdir(KNOWLEDGE_BASE_DIR):
        if file_name.endswith(".txt"):
            file_path = os.path.join(KNOWLEDGE_BASE_DIR, file_name)
            loader = TextLoader(file_path, encoding="utf-8")
            documents.extend(loader.load())
    return documents

def process_documents(documents):
    """处理文档，分割文本"""
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_documents(documents)

def create_vector_store(documents):
    """创建向量存储"""
    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    return FAISS.from_documents(documents, embeddings)

def initialize_deepseek():
    """初始化DeepSeek模型"""
    return DeepSeek(api_key=DEEPSEEK_API_KEY)

def answer_question(question, vector_store, deepseek_model):
    """回答用户问题"""
    # 搜索相关文档
    relevant_docs = vector_store.similarity_search(question, k=3)
    
    # 构建上下文
    context = "\n".join([doc.page_content for doc in relevant_docs])
    
    # 构建提示模板
    template = """您是一个健康养生专家，根据以下背景知识回答用户的问题：
    
    背景知识:
    {context}
    
    问题:
    {question}
    
    请基于背景知识提供专业、详细的回答。如果没有相关信息，请说明。"""
    
    prompt = PromptTemplate(
        template=template,
        input_variables=["context", "question"]
    )
    
    # 获取回答
    response = deepseek_model.predict(prompt.format(context=context, question=question))
    return response

def main():
    """主函数"""
    print("健康养生问答系统初始化中...")
    
    # 步骤1: 抓取微信公众号文章并保存到知识库
    print("正在抓取微信公众号文章...")
    articles = fetch_wechat_articles(WECHAT_ACCOUNTS)
    if articles:
        save_articles_to_knowledge_base(articles)
        print(f"已抓取并保存 {len(articles)} 篇文章到知识库")
    else:
        print("没有抓取到新文章，将使用现有知识库")
    
    # 步骤2: 加载知识库并创建向量存储
    print("正在加载知识库...")
    documents = load_knowledge_base()
    processed_docs = process_documents(documents)
    vector_store = create_vector_store(processed_docs)
    print("知识库加载完成")
    
    # 步骤3: 初始化DeepSeek模型
    print("正在初始化DeepSeek模型...")
    deepseek_model = initialize_deepseek()
    print("模型初始化完成")
    
    # 步骤4: 用户交互
    print("\n健康养生问答系统已就绪！")
    while True:
        question = input("\n请输入您的健康养生问题（输入'exit'退出）：")
        if question.lower() == "exit":
            print("感谢使用健康养生问答系统，再见！")
            break
        
        print("正在为您查找答案，请稍候...")
        try:
            answer = answer_question(question, vector_store, deepseek_model)
            print("\n回答：")
            print(answer)
        except Exception as e:
            print(f"回答问题时出错: {e}")

if __name__ == "__main__":
    main()

代码说明

微信文章抓取：
- 使用假设的wechat_articles_scraper库来抓取指定微信公众号的文章
- 实际应用中，您需要替换为实际的微信文章抓取方法
知识库管理：
- 将抓取的文章保存为文本文件
- 支持从现有知识库加载文章
文档处理与向量化：
- 使用CharacterTextSplitter将文档分割为适合处理的块
- 使用HuggingFaceEmbeddings将文本嵌入向量空间
- 使用FAISS创建向量存储以便快速检索
DeepSeek集成：
- 初始化DeepSeek模型
- 构建包含上下文的提示模板
- 使用模型生成答案

使用说明

安装依赖库：

pip install langchain huggingface_hub faiss-cpu sentence-transformers deepseek

替换代码中的your_deepseek_api_key为您实际的DeepSeek API密钥
运行程序：
```
python health_advisor.py
```
在程序提示时输入您的健康养生问题

注意：实际应用中，微信文章抓取部分需要根据实际情况调整，可能需要使用微信开放平台接口或其他抓取方法。