D:\Python3.11\python.exe C:/Users/17640/Desktop/整合测试初版.py
原始文档数: 1
有效文档数: 1
正在处理文档: 无标题
文档内容前100字符: 涂装知识 - Sheet1
涂料类型 | 子类 | 主要成分(含比例) | 颜色(RAL 编号) | 关键性能指标 | 执行标准 | 应用场景 | Unnamed: 7
--------------...
Process finished with exit code -1073741819 (0xC0000005)
import sys
import os
import pandas as pd
from PyQt5.QtWidgets import (QApplication, QWidget, QVBoxLayout, QTextEdit,
QPushButton, QFileDialog, QLabel, QMessageBox, QHBoxLayout)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtGui import QFont
from ollama import Client
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain_ollama import OllamaEmbeddings # 需要安装 langchain-ollama
import chromadb
import concurrent.futures
# 其余代码保持不变...
# Excel文件处理类 - 专注于涂装知识
class PaintKnowledgeProcessor:
@staticmethod
def process_excel(file_path):
"""读取涂装知识Excel文件并转换为文档列表"""
try:
# 读取Excel文件
excel_file = pd.ExcelFile(file_path)
# 获取所有表名
sheet_names = excel_file.sheet_names
documents = []
# 处理每个工作表
for sheet_name in sheet_names:
df = excel_file.parse(sheet_name)
# 跳过空表
if df.empty:
continue
# 将DataFrame转换为文本,突出涂装知识
sheet_text = PaintKnowledgeProcessor._df_to_text(df, sheet_name)
# 创建文档
metadata = {
"source": file_path,
"sheet": sheet_name,
"rows": len(df),
"columns": list(df.columns)
}
documents.append(Document(
page_content=sheet_text,
metadata=metadata,
description=f"涂装知识: {sheet_name}"
))
return documents
except Exception as e:
return [Document(
page_content=f"无法处理Excel文件: {str(e)}",
metadata={"source": file_path, "error": str(e)},
description=f"Excel文件: {os.path.basename(file_path)} (处理错误)"
)]
@staticmethod
def _df_to_text(df, sheet_name):
"""将DataFrame转换为文本格式,突出涂装知识"""
# 表头
text = f"涂装知识 - {sheet_name}\n\n"
# 添加列名
text += " | ".join(df.columns) + "\n"
text += "-" * (len(text) - 1) + "\n"
# 限制行数,避免生成过大的文本
max_rows = 200
rows = df.to_dict('records')
for i, row in enumerate(rows):
if i >= max_rows:
text += f"...\n显示了前 {max_rows} 行,共 {len(rows)} 行"
break
# 格式化每行数据
row_text = " | ".join([str(row.get(col, "")) for col in df.columns])
text += row_text + "\n"
return text
# AI处理线程 - 专注于涂装知识问答
class PaintAIWorker(QThread):
update_signal = pyqtSignal(str)
finish_signal = pyqtSignal()
def __init__(self, client, user_input, knowledge_retriever):
super().__init__()
self.client = client
self.user_input = user_input
self.knowledge_retriever = knowledge_retriever
def run(self):
try:
# 从知识库检索相关内容
context = ""
docs = []
if self.knowledge_retriever:
docs = self.knowledge_retriever.get_relevant_documents(self.user_input)
if docs:
context = "\n\n".join([doc.page_content for doc in docs])
self.update_signal.emit(f"[涂装知识库检索到 {len(docs)} 条相关内容]\n")
# 构建涂装知识专家提示词
prompt = f"""
你是一个专业的涂装知识专家。以下是相关的涂装参考资料:
{context}
用户问题:{self.user_input}
请根据上述涂装知识资料提供准确回答,并确保回答清晰、专业。
如果资料中没有相关信息,请提供涂装领域的通用知识和最佳实践。
"""
# 调用模型生成回答
stream = self.client.chat(
model='deepseek-r1:1.5b',
messages=[{"role": "user", "content": prompt}],
stream=True,
)
for chunk in stream:
content = chunk['message']['content']
self.update_signal.emit(content)
except Exception as e:
self.update_signal.emit(f"\n[Error] {str(e)}")
finally:
self.finish_signal.emit()
# Excel处理线程
class ExcelProcessorWorker(QThread):
progress_signal = pyqtSignal(str)
finish_signal = pyqtSignal(object)
error_signal = pyqtSignal(str) # 定义 error_signal
def __init__(self, file_path):
super().__init__()
self.file_path = file_path
self.processor = PaintKnowledgeProcessor()
def run(self):
try:
# 清理之前的向量数据库
if os.path.exists("./chroma_paint_kb"):
import shutil
shutil.rmtree("./chroma_paint_kb")
# 1. 解析Excel并获取原始文档
documents = PaintKnowledgeProcessor.process_excel(self.file_path)
print(f"原始文档数: {len(documents)}")
if not documents:
raise ValueError("未解析到有效文档内容")
# 2. 临时放宽过滤条件(测试用)
valid_documents = [doc for doc in documents if doc.page_content.strip()]
print(f"有效文档数: {len(valid_documents)}")
if not valid_documents:
raise ValueError("有效文档为空,可能过滤条件过严")
# 3. 配置调试用拆分器(增大chunk_size=500,宽松分隔符)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500, # 增大文本块大小
chunk_overlap=50, # 适当增大重叠部分
is_separator_regex=True,
separators=["\n\n", "\n", "。", "?", "!", ".", "", " "],
)
# 4. 并行执行拆分
def process_document(doc):
cleaned_text = doc.page_content.replace("\r", "\n").strip() # 统一换行符
if not cleaned_text:
return []
print(f"正在处理文档: {doc.metadata.get('name', '无标题')}")
print(f"文档内容前100字符: {cleaned_text[:100]}...")
return text_splitter.create_documents([cleaned_text])
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(process_document, valid_documents))
split_docs = [doc for sublist in results for doc in sublist]
# 5. 校验拆分结果
if not split_docs:
raise ValueError("拆分后文档数为0,可能内容过短或分隔符不匹配")
self.progress_signal.emit(f"文档拆分完成,生成 {len(split_docs)} 个文本块")
# 使用 Ollama 嵌入模型
# 使用 Ollama 嵌入模型
self.progress_signal.emit("正在加载 Ollama 嵌入模型...")
embeddings = OllamaEmbeddings(
model="orca-mini:3b", # Orca Mini 3B 模型
base_url="http://localhost:11434"
)
# 分批次处理文档
batch_size = 1 # 每批次处理的文档数量
for i in range(0, len(split_docs), batch_size):
batch_docs = split_docs[i:i + batch_size]
self.progress_signal.emit(f"正在生成第 {i // batch_size + 1} 批知识嵌入向量...")
if i == 0:
# 第一批文档创建向量存储
vectordb = Chroma.from_documents(
documents=batch_docs,
embedding=embeddings,
persist_directory="./chroma_paint_kb",
client_settings=chromadb.config.Settings(
anonymized_telemetry=False,
allow_reset=True,
is_persistent=True
)
)
else:
# 后续批次文档添加到向量存储
vectordb.add_documents(batch_docs)
vectordb.persist()
self.progress_signal.emit("涂装知识库初始化完成")
self.finish_signal.emit(vectordb.as_retriever())
except Exception as e:
import traceback
error_msg = f"[Error] 处理Excel文件时出错: {str(e)}\n{traceback.format_exc()}"
self.progress_signal.emit(error_msg)
self.error_signal.emit(error_msg)
self.finish_signal.emit(None)
# 主聊天窗口类
class PaintChatWindow(QWidget):
def __init__(self):
super().__init__()
self.init_ui()
self.client = Client(host="http://localhost:11435") # 使用 host 而非 base_url
self.knowledge_retriever = None
def init_ui(self):
self.setWindowTitle("涂装知识智能助手")
self.setGeometry(100, 100, 800, 600)
main_layout = QVBoxLayout()
# 顶部状态区域
self.status_label = QLabel("涂装知识库未加载")
self.status_label.setFont(QFont("SimHei", 10))
self.status_label.setStyleSheet("color: #888888; padding: 5px;")
main_layout.addWidget(self.status_label)
# 聊天显示区域
self.chat_display = QTextEdit()
self.chat_display.setReadOnly(True)
self.chat_display.setStyleSheet("""
background-color: #f0f0f0;
font-size: 14px;
padding: 10px;
font-family: SimHei, sans-serif;
""")
main_layout.addWidget(self.chat_display)
# Excel文件管理区域
excel_layout = QHBoxLayout()
self.excel_path_label = QLabel("未选择Excel文件")
self.excel_path_label.setStyleSheet("color: #666666; font-size: 12px;")
excel_layout.addWidget(self.excel_path_label)
self.load_excel_btn = QPushButton("加载涂装知识库")
self.load_excel_btn.setStyleSheet("""
QPushButton {
background-color: #2196F3;
color: white;
border: none;
padding: 5px 10px;
font-size: 12px;
}
QPushButton:hover { background-color: #0b7dda; }
""")
self.load_excel_btn.clicked.connect(self.load_excel_file)
excel_layout.addWidget(self.load_excel_btn)
main_layout.addLayout(excel_layout)
# 进度显示
self.progress_display = QTextEdit()
self.progress_display.setReadOnly(True)
self.progress_display.setMaximumHeight(60)
self.progress_display.setStyleSheet("""
background-color: #f9f9f9;
font-size: 12px;
padding: 5px;
color: #555555;
""")
main_layout.addWidget(self.progress_display)
# 输入框和发送按钮
input_layout = QHBoxLayout()
self.input_area = QTextEdit()
self.input_area.setMaximumHeight(100)
self.input_area.setPlaceholderText("输入您的涂装相关问题...")
self.input_area.setStyleSheet("""
background-color: white;
font-size: 14px;
padding: 10px;
font-family: SimHei, sans-serif;
""")
input_layout.addWidget(self.input_area)
self.send_btn = QPushButton("提问")
self.send_btn.setStyleSheet("""
QPushButton {
background-color: #4CAF50;
color: white;
border: none;
padding: 10px;
font-size: 14px;
min-width: 80px;
}
QPushButton:hover { background-color: #45a049; }
""")
self.send_btn.clicked.connect(self.send_message)
input_layout.addWidget(self.send_btn)
main_layout.addLayout(input_layout)
self.setLayout(main_layout)
def load_excel_file(self):
file_path, _ = QFileDialog.getOpenFileName(
self, "选择涂装知识Excel文件", "", "Excel Files (*.xlsx *.xls)"
)
if file_path:
self.excel_path_label.setText(f"涂装知识库: {os.path.basename(file_path)}")
self.progress_display.clear()
self.excel_worker = ExcelProcessorWorker(file_path)
self.excel_worker.progress_signal.connect(self.update_progress)
self.excel_worker.finish_signal.connect(self.on_excel_loaded)
self.excel_worker.start()
def update_progress(self, message):
self.progress_display.append(message)
def on_excel_loaded(self, retriever):
if retriever:
self.knowledge_retriever = retriever
self.status_label.setText("涂装知识库已加载,可以开始提问")
self.status_label.setStyleSheet("color: #4CAF50; padding: 5px; font-weight: bold;")
else:
QMessageBox.critical(self, "错误", "涂装知识库加载失败")
def send_message(self):
user_input = self.input_area.toPlainText().strip()
if not user_input:
return
self._append_message("You", user_input)
self.input_area.clear()
self._append_message("Bot", "思考中...", is_streaming=True)
self.ai_thread = PaintAIWorker(
self.client,
user_input,
self.knowledge_retriever
)
self.ai_thread.update_signal.connect(self.update_bot_response)
self.ai_thread.finish_signal.connect(self.finalize_response)
self.ai_thread.start()
def _append_message(self, sender, message, is_streaming=False):
cursor = self.chat_display.textCursor()
cursor.movePosition(cursor.End)
if sender == "You":
prefix = "\nYou: "
color = "#333333"
else:
prefix = "\nBot: "
color = "#2196F3" if not is_streaming else "#888888"
cursor.insertText(prefix)
cursor.insertHtml(f'<span style="color:{color}">{message}</span>')
self.chat_display.ensureCursorVisible()
def update_bot_response(self, content):
current_text = self.chat_display.toPlainText()
if current_text.endswith("思考中..."):
self.chat_display.setPlainText(current_text[:-4] + content)
else:
cursor = self.chat_display.textCursor()
cursor.movePosition(cursor.End)
cursor.insertText(content)
self.chat_display.ensureCursorVisible()
def finalize_response(self):
current = self.chat_display.toPlainText()
if not current.endswith("\n"):
self.chat_display.append("")
if __name__ == "__main__":
app = QApplication(sys.argv)
# 确保中文显示正常
font = QFont("SimHei")
app.setFont(font)
window = PaintChatWindow()
window.show()
sys.exit(app.exec_())为什么运行这段代代码以后添加知识库总会莫名其妙的退出
最新发布