import os
import sys
import requests
from PyQt5.QtWidgets import (
QApplication, QMainWindow,
QFileDialog,
QLabel,
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer
from PyQt5.QtGui import QTextCursor
from docx import Document
from 智能问答系统_ui import Ui_aiWindow
# LangChain 模块
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.document_loaders import TextLoader, Docx2txtLoader
from langchain_community.vectorstores import FAISS
class aiWindow(QMainWindow):
def __init__(self):
super().__init__()
self.ui = Ui_aiWindow()
self.ui.setupUi(self)
self.resize(700, 500)
# 初始化路径
self.knowledge_dir = os.path.abspath("./knowledge_files")
self.persist_directory = os.path.abspath("./faiss_index")
os.makedirs(self.knowledge_dir, exist_ok=True)
os.makedirs(self.persist_directory, exist_ok=True)
# 初始化变量
self.knowledge_db = None
self.build_thread = None
self.qa_thread = None
# 页面切换按钮
self.ui.stackedWidget.setCurrentIndex(0)
self.ui.pushButton.clicked.connect(lambda: self.ui.stackedWidget.setCurrentIndex(0))
self.ui.pushButton_2.clicked.connect(lambda: self.ui.stackedWidget.setCurrentIndex(1))
# 知识列表加载
self.load_existing_files()
self.ui.listWidget.itemClicked.connect(self.preview_file)
# 自动加载向量数据库
self.load_vector_db()
# 构建数据库按钮
self.ui.pushButton_5.clicked.connect(self.start_build_vector_db)
# 提交问题按钮
self.ui.pushButton_6.clicked.connect(self.ask_question)
# 上传/删除文件
self.ui.pushButton_3.clicked.connect(self.add_file)
self.ui.pushButton_4.clicked.connect(self.remove_file)
self.show()
def load_existing_files(self):
self.ui.listWidget.clear()
for file in os.listdir(self.knowledge_dir):
if file.endswith((".txt", ".docx")):
self.ui.listWidget.addItem(file)
def add_file(self):
file_paths, _ = QFileDialog.getOpenFileNames(self, "选择知识库文件", "", "文档文件 (*.txt *.docx)")
for path in file_paths:
filename = os.path.basename(path)
dest = os.path.join(self.knowledge_dir, filename)
if not os.path.exists(dest):
with open(path, "rb") as src, open(dest, "wb") as dst:
dst.write(src.read())
self.ui.listWidget.addItem(filename)
def preview_file(self):
item = self.ui.listWidget.currentItem()
if not item:
return
filename = item.text()
path = os.path.join(self.knowledge_dir, filename)
try:
if filename.endswith(".txt"):
with open(path, "r", encoding="utf-8") as f:
content = f.read()
elif filename.endswith(".docx"):
doc = Document(path)
content = "\n".join([p.text for p in doc.paragraphs])
else:
content = "不支持的格式"
except Exception as e:
content = f"读取失败: {e}"
self.ui.textEdit_2.setPlainText(content)
def remove_file(self):
selected_item = self.ui.listWidget.currentItem()
if not selected_item:
return
filename = selected_item.text()
path = os.path.join(self.knowledge_dir, filename)
try:
if os.path.exists(path):
os.remove(path)
# 删除向量库中与该文件相关的条目
if os.path.exists(self.persist_directory):
embeddings = OllamaEmbeddings(model="nomic-embed-text")
db = FAISS.load_local(self.persist_directory, embeddings, allow_dangerous_deserialization=True)
to_delete = []
for doc_id, doc in db.docstore._dict.items():
if doc.metadata.get("source") == filename:
to_delete.append(doc_id)
if to_delete:
db.delete(to_delete)
db.save_local(self.persist_directory)
show_toast(f"✅ 已从向量库中删除 {len(to_delete)} 条与 {filename} 相关的记录。")
except Exception as e:
show_toast(f"❌ 删除向量记录或文件时出错:{e}")
self.ui.listWidget.takeItem(self.ui.listWidget.row(selected_item))
self.ui.textEdit_2.clear()
def load_vector_db(self):
try:
print("🔍 开始加载向量数据库...")
embeddings = OllamaEmbeddings(model="nomic-embed-text")
print("✅ 已初始化 OllamaEmbeddings")
if not os.path.exists(self.persist_directory):
print("🟡 向量库目录不存在,跳过加载")
return
print("📥 正在加载向量数据库...")
self.knowledge_db = FAISS.load_local(
self.persist_directory, embeddings, allow_dangerous_deserialization=True
)
print("✅ 向量数据库加载成功")
except Exception as e:
print("❌ 加载向量数据库失败:", e)
# 避免无限递归调用
if not getattr(self, "_is_building", False):
self._is_building = True
self.start_build_vector_db()
def handle_build_success(self, msg):
self.ui.textEdit.append(msg)
self._is_building = False
self.load_vector_db()
def start_build_vector_db(self):
print("🚀 开始构建向量数据库线程...")
self.build_thread = BuildVectorDBThread(self.knowledge_dir)
self.build_thread.status_signal.connect(self.update_status)
self.build_thread.result_signal.connect(lambda _: self.load_vector_db()) # 构建完成后重新加载
self.build_thread.error_signal.connect(self.show_error)
self.build_thread.finished.connect(self.build_thread.deleteLater)
self.build_thread.start()
def update_status(self, msg):
cursor = self.ui.textEdit.textCursor()
cursor.movePosition(QTextCursor.End)
cursor.insertText(msg + "\n")
self.ui.textEdit.setTextCursor(cursor)
def show_result(self, result):
self.ui.textEdit.append(result)
self.load_vector_db() # 重新加载数据库
def show_error(self, error):
self.ui.textEdit.append(error)
def ask_question(self):
question = self.ui.lineEdit.text()
if not question or not self.knowledge_db:
return
self.ui.textEdit.append("⏳ 正在获取答案,请稍候...")
self.ui.pushButton_6.setEnabled(False)
self.qa_thread = QAWorkerThread(question, self.knowledge_db)
self.qa_thread.status_signal.connect(self.update_status)
self.qa_thread.result_signal.connect(self.show_ask_result)
self.qa_thread.error_signal.connect(self.show_error)
self.qa_thread.finished.connect(self.qa_thread.deleteLater)
self.qa_thread.start()
def show_ask_result(self, result):
self.ui.textEdit.setPlainText(result)
class BuildVectorDBThread(QThread):
status_signal = pyqtSignal(str)
result_signal = pyqtSignal(str)
error_signal = pyqtSignal(str)
def __init__(self, knowledge_dir, parent=None):
super().__init__(parent)
self.knowledge_dir = knowledge_dir
self.persist_directory = os.path.abspath("./faiss_index")
def run(self):
print("📚 正在构建向量数据库...")
# 检查知识库目录是否存在
print("🔍 检查知识库目录是否存在:", self.knowledge_dir)
if not os.path.exists(self.knowledge_dir):
self.error_signal.emit("❌ 知识库目录不存在")
print("❌ 错误:知识库目录不存在")
return
# 处理已处理过的文件记录
processed_files_path = os.path.join(self.knowledge_dir, ".processed_files.txt")
print("📄 加载已处理文件记录:", processed_files_path)
if os.path.exists(processed_files_path):
with open(processed_files_path, "r", encoding="utf-8") as f:
processed_files = set(f.read().splitlines())
print(f"✅ 已读取 {len(processed_files)} 个已处理文件名")
else:
processed_files = set()
print("🆕 未找到已处理文件记录,将创建新的记录")
documents = []
new_processed_files = list(processed_files)
# 遍历知识库目录,处理新文件
print("📂 开始遍历知识库目录中的文件...")
for file in os.listdir(self.knowledge_dir):
file_path = os.path.join(self.knowledge_dir, file)
print(f"🔎 检查文件: {file}")
if not (file.endswith(".txt") or file.endswith(".docx")):
print(f"🚫 跳过非支持格式文件: {file}")
continue
if file not in processed_files:
try:
print(f"⚙️ 正在加载文件: {file}")
if file.endswith(".txt"):
loader = TextLoader(file_path, encoding="utf-8")
else:
loader = Docx2txtLoader(file_path)
docs = loader.load()
print(f"📄 从 {file} 中加载了 {len(docs)} 个文档")
for doc in docs:
doc.metadata["source"] = file
documents.extend(docs)
new_processed_files.append(file)
self.status_signal.emit(f"✅ 文件 {file} 处理完成")
except Exception as e:
self.status_signal.emit(f"⚠️ 加载文件 {file} 出错:{e}")
print(f"⚠️ 加载文件 {file} 出错:{e}")
if not documents:
self.status_signal.emit("✅ 没有新文件需要处理。")
print("✅ 没有新文件需要处理,跳过构建")
return
# 分割文档
print("✂️ 开始分割文档...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)
print(f"🧩 分割完成,共生成 {len(docs)} 个文本块")
self.status_signal.emit(f"🧩 分割完成,共生成 {len(docs)} 个文本块")
# 初始化 embeddings
print("🧠 初始化 OllamaEmbeddings(模型:nomic-embed-text)")
try:
embeddings = OllamaEmbeddings(model="nomic-embed-text")
print("✅ embeddings 初始化成功")
# 如果向量数据库目录存在,则加载;否则创建新库
if os.path.exists(self.persist_directory):
print("📥 正在加载现有向量数据库...")
db = FAISS.load_local(self.persist_directory, embeddings, allow_dangerous_deserialization=True)
else:
print("🆕 正在创建新的向量数据库...")
db = FAISS.from_documents(docs, embeddings)
print("🔄 正在添加新文档到向量数据库...")
db.add_documents(docs)
print("💾 正在保存向量数据库...")
db.save_local(self.persist_directory)
self.status_signal.emit("✅ 向量数据库保存成功")
print("✅ 向量数据库保存成功")
# 更新已处理文件列表
print("📝 正在更新已处理文件列表...")
with open(processed_files_path, "w", encoding="utf-8") as f:
f.write("\n".join(new_processed_files))
print("✅ 已更新已处理文件列表")
self.result_signal.emit("🎉 向量数据库构建完成")
print("🎉 向量数据库构建完成")
except Exception as e:
self.error_signal.emit(f"❌ 构建数据库时发生错误:{e}")
print(f"❌ 构建数据库时发生错误:{e}")
class QAWorkerThread(QThread):
status_signal = pyqtSignal(str)
result_signal = pyqtSignal(str)
error_signal = pyqtSignal(str)
def __init__(self, question, knowledge_db, parent=None):
super().__init__(parent)
self.question = question
self.knowledge_db = knowledge_db
def run(self):
try:
# 检查 Ollama 是否运行
response = requests.get("http://127.0.0.1:11434/api/version")
if response.status_code != 200:
raise Exception("❌ Ollama 服务未启动,请先启动服务。")
# 检查模型是否存在
model_response = requests.get("http://127.0.0.1:11434/api/tags")
models = model_response.json().get("models", [])
if not models:
raise Exception("❌ 未找到可用模型,请先拉取模型。")
available_models = [m["name"] for m in models]
if "gemma3:1b" not in available_models:
raise Exception("❌ 模型 gemma3:1b 不存在,请先拉取该模型。")
# 检索知识库
docs = self.knowledge_db.similarity_search(self.question, k=6)
if not docs:
raise Exception("❌ 未找到相关知识,请尝试其他问题。")
context = "\n\n".join([
f"文档{i + 1}:\n{doc.page_content.replace('**', '').replace('*', '')}"
for i, doc in enumerate(docs)
])
prompt = f"""请根据以下知识内容,用自己的话总结并回答问题。请确保回答清晰、准确,并基于提供的资料,不要编造内容。
{context}
问题:{self.question}
答案:"""
# 发送请求
response = requests.post(
"http://127.0.0.1:11434/api/chat",
json={
"model": "gemma3:1b",
"messages": [{"role": "user", "content": prompt}],
"stream": False
}
)
answer = response.json().get("message", {}).get("content", "未获取到有效回答")
self.result_signal.emit(f"问题:{self.question}\n答案:{answer}\n{'-' * 30}")
except Exception as e:
self.error_signal.emit(f"❌ 处理过程中发生错误:{str(e)}")
class ToastLabel(QLabel):
def __init__(self, text, duration=2000):
super().__init__(text)
self.setStyleSheet('''
background-color: #333;
color: #fff;
padding: 10px;
border-radius: 5px;
''')
self.setAlignment(Qt.AlignCenter)
self.setWordWrap(True)
self.setFixedSize(250, 80)
self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint | Qt.ToolTip)
self.setAttribute(Qt.WA_ShowWithoutActivating)
QTimer.singleShot(0, self.center_on_screen)
QTimer.singleShot(duration, lambda: (self.hide(), self.deleteLater()))
def show_toast(message, duration=2000):
toast = ToastLabel(message, duration)
toast.show()
if __name__ == '__main__':
from PyQt5 import QtCore
from PyQt5 import QtGui
QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling, True)
QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_UseHighDpiPixmaps, True)
QtGui.QGuiApplication.setAttribute(QtCore.Qt.HighDpiScaleFactorRoundingPolicy.PassThrough)
app = QApplication(sys.argv)
# 设置 DPI 适配
screen = app.screens()[0]
dpi = screen.logicalDotsPerInch()
base_dpi = 96.0
scale_factor = dpi / base_dpi
font_size = max(12, int(12 * scale_factor))
font = app.font()
font.setPointSize(font_size)
app.setFont(font)
window = aiWindow()
sys.exit(app.exec())
把刚才那个修改加到我的代码里