app_line.htm 出现的错误

本文介绍了在程序调试过程中遇到的“app_offline.htm”文件导致的应用无法启动问题及其解决方案。通过从项目中移除该文件或直接删除,可以有效解决此问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

今天在调试程序的时候,出现一个

This application is currently offline. To enable the application, remove the app_offline.htm file from the application root directory.   的错误
解决方法:把 “app_offline.htm” 不包含在项目中,或者将它删除 ,问题即解。
我想创建一个面板程序:执行以下步骤: 1、调用WinMerge生成HTML差异文件 2、将生成的HTML文件与目标Excel文件放在同一目录 3、将生成的HTML文件转换为excel文件,并且复制转换后的excel文件的A~F列数据,粘贴到目标Excel文件的“一覧”工作表第6行开始的A~F列 4、点击目标Excel文件“一覧”工作表上的“作成”按钮 5、等待处理完成 目前我的代码为:import os import subprocess import shutil import time import tkinter as tk from tkinter import filedialog, ttk, scrolledtext, messagebox, PhotoImage import pandas as pd import win32com.client as win32 from bs4 import BeautifulSoup import threading import tempfile import queue import traceback class DiffProcessorApp: def __init__(self, root): self.root = root root.title("高级文件夹比较工具") root.geometry("1000x700") root.configure(bg="#f5f5f5") # 创建现代风格主题 self.style = ttk.Style() self.style.theme_use('clam') # 自定义主题颜色 self.style.configure('TButton', font=('Segoe UI', 10, 'bold'), borderwidth=1, foreground="#333", background="#4CAF50", bordercolor="#388E3C", relief="flat", padding=8, anchor="center") self.style.map('TButton', background=[('active', '#388E3C'), ('disabled', '#BDBDBD')], foreground=[('disabled', '#9E9E9E')]) self.style.configure('TLabel', font=('Segoe UI', 9), background="#f5f5f5") self.style.configure('TLabelframe', font=('Segoe UI', 10, 'bold'), background="#f5f5f5", relief="flat", borderwidth=2) self.style.configure('TLabelframe.Label', font=('Segoe UI', 10, 'bold'), background="#f5f5f5", foreground="#2E7D32") self.style.configure('Treeview', font=('Segoe UI', 9), rowheight=25) self.style.configure('Treeview.Heading', font=('Segoe UI', 9, 'bold')) # 创建主框架 main_frame = ttk.Frame(root, padding="15") main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 标题区域 header_frame = ttk.Frame(main_frame) header_frame.pack(fill=tk.X, pady=(0, 15)) # 添加标题图标 try: icon = PhotoImage(file="folder_icon.png") self.icon_label = ttk.Label(header_frame, image=icon) self.icon_label.image = icon self.icon_label.pack(side=tk.LEFT, padx=(0, 10)) except: self.icon_label = ttk.Label(header_frame, text="📁", font=("Arial", 24)) self.icon_label.pack(side=tk.LEFT, padx=(0, 10)) title_label = ttk.Label(header_frame, text="高级文件夹比较工具", font=("Segoe UI", 18, "bold"), foreground="#2E7D32") title_label.pack(side=tk.LEFT) # 文件选择区域 file_frame = ttk.LabelFrame(main_frame, text="文件夹选择", padding="12") file_frame.pack(fill=tk.X, pady=5) # 文件夹选择 self.old_folder_entry, self.new_folder_entry = self.create_folder_selector(file_frame, "原始文件夹:") self.new_folder_entry = self.create_folder_selector(file_frame, "修改后文件夹:")[0] # 比较选项区域 options_frame = ttk.LabelFrame(main_frame, text="比较选项", padding="12") options_frame.pack(fill=tk.X, pady=5) # 递归比较选项 self.recursive_var = tk.BooleanVar(value=True) recursive_check = ttk.Checkbutton(options_frame, text="递归比较子文件夹", variable=self.recursive_var) recursive_check.grid(row=0, column=0, padx=10, pady=5, sticky=tk.W) # 文件过滤 filter_frame = ttk.Frame(options_frame) filter_frame.grid(row=0, column=1, padx=10, pady=5, sticky=tk.W) ttk.Label(filter_frame, text="文件过滤:").pack(side=tk.LEFT, padx=(0, 5)) self.filter_var = tk.StringVar(value="*.*") filter_entry = ttk.Entry(filter_frame, textvariable=self.filter_var, width=15) filter_entry.pack(side=tk.LEFT) # 目标Excel选择 excel_frame = ttk.LabelFrame(main_frame, text="输出设置", padding="12") excel_frame.pack(fill=tk.X, pady=5) ttk.Label(excel_frame, text="目标Excel文件:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=5) self.excel_file_entry = ttk.Entry(excel_frame, width=60) self.excel_file_entry.grid(row=0, column=1, padx=5, pady=5) ttk.Button(excel_frame, text="浏览...", command=lambda: self.select_file(self.excel_file_entry, [("Excel文件", "*.xlsx *.xlsm")])).grid(row=0, column=2, padx=5, pady=5) # 执行按钮区域 button_frame = ttk.Frame(main_frame) button_frame.pack(fill=tk.X, pady=10) self.run_button = ttk.Button(button_frame, text="执行比较", command=self.start_processing, width=20, style='TButton') self.run_button.pack(side=tk.LEFT) # 停止按钮 self.stop_button = ttk.Button(button_frame, text="停止", command=self.stop_processing, width=10, state=tk.DISABLED) self.stop_button.pack(side=tk.LEFT, padx=10) # 进度条 self.progress = ttk.Progressbar(main_frame, orient=tk.HORIZONTAL, length=700, mode='determinate') self.progress.pack(fill=tk.X, pady=5) # 状态信息 status_frame = ttk.Frame(main_frame) status_frame.pack(fill=tk.X, pady=5) self.status_var = tk.StringVar(value="准备就绪") status_label = ttk.Label(status_frame, textvariable=self.status_var, font=("Segoe UI", 9), foreground="#2E7D32") status_label.pack(side=tk.LEFT) # 日志和预览区域 notebook = ttk.Notebook(main_frame) notebook.pack(fill=tk.BOTH, expand=True, pady=5) # 文件夹结构标签 tree_frame = ttk.Frame(notebook, padding="5") notebook.add(tree_frame, text="文件夹结构") # 创建树形视图 self.tree = ttk.Treeview(tree_frame, columns=("Status"), show="tree") self.tree.heading("#0", text="文件夹结构", anchor=tk.W) self.tree.heading("Status", text="状态", anchor=tk.W) self.tree.column("#0", width=400) self.tree.column("Status", width=100) vsb = ttk.Scrollbar(tree_frame, orient="vertical", command=self.tree.yview) hsb = ttk.Scrollbar(tree_frame, orient="horizontal", command=self.tree.xview) self.tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) self.tree.grid(row=0, column=0, sticky="nsew") vsb.grid(row=0, column=1, sticky="ns") hsb.grid(row=1, column=0, sticky="ew") # 日志标签 log_frame = ttk.Frame(notebook, padding="5") notebook.add(log_frame, text="执行日志") self.log_text = scrolledtext.ScrolledText(log_frame, height=10, wrap=tk.WORD, font=("Consolas", 9)) self.log_text.pack(fill=tk.BOTH, expand=True) self.log_text.config(state=tk.DISABLED) # 设置网格权重 tree_frame.grid_rowconfigure(0, weight=1) tree_frame.grid_columnconfigure(0, weight=1) # 线程控制 self.processing = False self.queue = queue.Queue() # 启动队列处理 self.root.after(100, self.process_queue) def create_folder_selector(self, parent, label_text): """创建文件夹选择器组件""" frame = ttk.Frame(parent) frame.pack(fill=tk.X, pady=5) ttk.Label(frame, text=label_text).grid(row=0, column=0, sticky=tk.W, padx=5, pady=5) entry = ttk.Entry(frame, width=70) entry.grid(row=0, column=1, padx=5, pady=5) button = ttk.Button(frame, text="浏览文件夹...", command=lambda: self.select_folder(entry)) button.grid(row=0, column=2, padx=5, pady=5) return entry, button def select_folder(self, entry): """选择文件夹""" foldername = filedialog.askdirectory() if foldername: entry.delete(0, tk.END) entry.insert(0, foldername) # 自动填充文件夹结构 self.populate_folder_tree(foldername) def select_file(self, entry, filetypes=None): """选择文件""" if filetypes is None: filetypes = [("所有文件", "*.*")] filename = filedialog.askopenfilename(filetypes=filetypes) if filename: entry.delete(0, tk.END) entry.insert(0, filename) def populate_folder_tree(self, path): """填充文件夹结构树""" self.tree.delete(*self.tree.get_children()) if not os.path.isdir(path): return # 添加根节点 root_node = self.tree.insert("", "end", text=os.path.basename(path), values=("文件夹",), open=True) self.add_tree_nodes(root_node, path) def add_tree_nodes(self, parent, path): """递归添加树节点""" try: for item in os.listdir(path): item_path = os.path.join(path, item) if os.path.isdir(item_path): node = self.tree.insert(parent, "end", text=item, values=("文件夹",)) self.add_tree_nodes(node, item_path) else: self.tree.insert(parent, "end", text=item, values=("文件",)) except PermissionError: self.log_message(f"权限错误: 无法访问 {path}") def log_message(self, message): """记录日志消息""" self.queue.put(("log", message)) def update_progress(self, value): """更新进度条""" self.queue.put(("progress", value)) def update_status(self, message): """更新状态信息""" self.queue.put(("status", message)) def process_queue(self): """处理线程队列中的消息""" try: while not self.queue.empty(): msg_type, data = self.queue.get_nowait() if msg_type == "log": self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, data + "\n") self.log_text.see(tk.END) self.log_text.config(state=tk.DISABLED) elif msg_type == "progress": self.progress['value'] = data elif msg_type == "status": self.status_var.set(data) except queue.Empty: pass self.root.after(100, self.process_queue) def write_to_excel(self, excel_path, diff_data): """将差异数据写入Excel""" self.log_message("正在写入Excel文件...") try: # 使用win32com打开Excel excel = win32.gencache.EnsureDispatch('Excel.Application') excel.Visible = True workbook = excel.Workbooks.Open(os.path.abspath(excel_path)) sheet = workbook.Sheets("一覧") # 从第6行开始写入数据 start_row = 6 for i, row_data in enumerate(diff_data): for j, value in enumerate(row_data[:6]): # 确保值是字符串类型 sheet.Cells(start_row + i, j + 1).Value = str(value) # 保存Excel workbook.Save() self.log_message(f"数据已写入Excel第{start_row}行开始") # 触发"作成"按钮 self.log_message("正在触发'作成'按钮...") try: # 查找按钮并点击 button = sheet.Buttons("作成") button.OnAction = "作成按钮的处理" button.Click() self.log_message("已触发'作成'按钮") # 等待处理完成 self.update_status("处理中...请等待") # 简单等待机制 for _ in range(30): # 最多等待30秒 if not self.processing: break if excel.CalculationState == 0: # 0 = xlDone break time.sleep(1) self.log_message("处理中...") self.log_message("处理完成") self.update_status("处理完成") except Exception as e: # 修复TypeError: 使用f-string记录异常 self.log_message(f"按钮操作失败: {str(e)}. 请手动点击'作成'按钮") # 关闭Excel workbook.Close() excel.Quit() return True except Exception as e: # 修复TypeError: 使用f-string记录异常 self.log_message(f"Excel操作失败: {str(e)}\n{traceback.format_exc()}") return False def start_processing(self): """启动处理线程 - 修复无响应问题""" if self.processing: self.log_message("警告: 处理正在进行中") return # 获取路径 old_path = self.old_folder_entry.get() new_path = self.new_folder_entry.get() excel_file = self.excel_file_entry.get() # 详细路径验证 validation_errors = [] if not old_path: validation_errors.append("原始文件夹路径为空") elif not os.path.isdir(old_path): validation_errors.append(f"原始文件夹路径无效: {old_path}") if not new_path: validation_errors.append("新文件夹路径为空") elif not os.path.isdir(new_path): validation_errors.append(f"新文件夹路径无效: {new_path}") if not excel_file: validation_errors.append("Excel文件路径为空") elif not excel_file.lower().endswith(('.xlsx', '.xlsm')): validation_errors.append("Excel文件必须是.xlsx或.xlsm格式") if validation_errors: self.log_message("错误: " + "; ".join(validation_errors)) messagebox.showerror("输入错误", "\n".join(validation_errors)) return # 检查WinMerge安装 winmerge_path = r"E:\App\WinMerge\WinMerge2.16.12.0\WinMergeU.exe" if not os.path.exists(winmerge_path): self.log_message(f"错误: WinMerge未安装在默认位置 {winmerge_path}") messagebox.showwarning("WinMerge未安装", "请确保WinMerge已安装或更新路径配置") return # 禁用执行按钮,启用停止按钮 self.run_button.config(state=tk.DISABLED) self.stop_button.config(state=tk.NORMAL) self.processing = True # 启动处理线程 thread = threading.Thread(target=self.process_folders, args=(old_path, new_path, excel_file)) thread.daemon = True thread.start() self.log_message("处理线程已启动") def process_folders(self, old_path, new_path, excel_file): """处理文件夹比较的线程函数 - 增强异常处理""" output_html = None try: # 步骤1: 生成HTML差异文件 self.update_status("生成HTML差异文件...") self.update_progress(20) # 使用临时文件存储HTML报告 with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as temp_file: output_html = temp_file.name if not self.run_winmerge(old_path, new_path, output_html): self.update_status("WinMerge执行失败") return # 步骤2: 将HTML文件与Excel放在同一目录 self.update_status("准备文件...") self.update_progress(40) excel_dir = os.path.dirname(excel_file) if excel_dir: target_html = os.path.join(excel_dir, "diff_report.html") try: shutil.copy(output_html, target_html) self.log_message(f"已将HTML文件复制到: {target_html}") except Exception as e: self.log_message(f"文件复制失败: {str(e)}") return # 步骤3: 解析HTML差异文件 self.update_status("解析差异数据...") self.update_progress(60) diff_data = self.parse_html_diff(output_html) if not diff_data: self.update_status("HTML解析失败") return # 步骤4: 写入Excel并触发按钮 self.update_status("写入Excel并触发处理...") self.update_progress(80) if not self.write_to_excel(excel_file, diff_data): self.update_status("Excel操作失败") return # 完成 self.update_progress(100) self.update_status("处理完成!") self.log_message("文件夹比较流程执行完毕") messagebox.showinfo("完成", "文件夹比较处理成功完成") except Exception as e: error_msg = f"执行过程中发生错误: {str(e)}\n{traceback.format_exc()}" self.log_message(error_msg) self.update_status("执行失败") messagebox.showerror("错误", f"处理失败: {str(e)}") finally: # 重新启用执行按钮 if self.processing: self.stop_processing() # 清理临时文件 if output_html and os.path.exists(output_html): try: os.remove(output_html) except: pass def run_winmerge(self, path1, path2, output_html): """增强的WinMerge调用方法 - 解决弹窗阻塞问题""" winmerge_path = r"E:\App\WinMerge\WinMerge2.16.12.0\WinMergeU.exe" # 验证WinMerge可执行文件 if not os.path.exists(winmerge_path): self.log_message(f"错误: WinMerge路径不存在 {winmerge_path}") return False # 构建抑制弹窗的命令参数 winmerge_cmd = [ winmerge_path, '/u', # 不显示GUI界面 '/minimize', # 最小化窗口 '/noprefs', # 不使用保存的选项 '/exit', # 完成后自动退出 - 关键参数[^1] '/dl', 'Base', '/dr', 'Modified', '/or', output_html, path1, path2 ] # 添加递归选项 if self.recursive_var.get(): winmerge_cmd.insert(1, '/r') self.log_message(f"执行命令: {' '.join(winmerge_cmd)}") try: # 使用Popen启动进程(非阻塞) proc = subprocess.Popen( winmerge_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, creationflags=subprocess.CREATE_NO_WINDOW ) # 设置超时监控 timeout = 120 # 秒 start_time = time.time() while proc.poll() is None: # 进程仍在运行 # 更新状态 elapsed = int(time.time() - start_time) self.update_status(f"生成报告中...({elapsed}秒)") # 超时处理 if elapsed > timeout: self.log_message("WinMerge执行超时,强制终止进程") proc.terminate() try: proc.wait(timeout=5) except subprocess.TimeoutExpired: proc.kill() return False # 定期检查进程状态 time.sleep(1) # 检查退出码 if proc.returncode == 0: # 验证报告文件是否生成 if not os.path.exists(output_html): self.log_message(f"错误: 报告文件未生成 {output_html}") return False # 验证报告内容有效性 with open(output_html, 'r', encoding='utf-8') as f: content = f.read(1024) # 只读取前1KB检查 if '<table' not in content: self.log_message("警告: 报告文件不包含表格数据") self.log_message(f"HTML差异报告生成成功: {output_html}") return True else: # 获取错误输出 stderr_output = proc.stderr.read().decode('utf-8', errors='ignore') error_msg = f"WinMerge异常退出(代码{proc.returncode}): {stderr_output}" self.log_message(error_msg) return False except Exception as e: self.log_message(f"WinMerge执行错误: {str(e)}") return False def parse_html_diff(self, html_file): """增强的HTML报告解析方法 - 解决表格查找失败问题""" try: # 验证文件是否存在 if not os.path.exists(html_file): self.log_message(f"错误: HTML文件不存在 {html_file}") return [] # 读取文件内容(处理可能的编码问题) with open(html_file, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # 检查是否有有效内容 if not content.strip(): self.log_message("警告: HTML文件为空") return [] soup = BeautifulSoup(content, 'html.parser') # 增强表格查找方法 - 尝试多种定位方式[^2] table = None # 方式1: 通过特定ID查找 table = soup.find('table', id='filediff') # 方式2: 通过特定class查找 if not table: table = soup.find('table', class_='filediff') # 方式3: 查找包含特定标题的表格 if not table: for t in soup.find_all('table'): if t.find('th', text='Filename'): table = t break if not table: self.log_message("未找到差异表格,可能是无差异或格式变化") return [] # 提取差异文件列表 diff_files = [] for row in table.find_all('tr')[1:]: # 跳过表头 cols = row.find_all('td') if len(cols) >= 3: # 获取文件名(第二列或第三列) filename = cols[1].get_text(strip=True) or cols[2].get_text(strip=True) if filename: diff_files.append(filename) self.log_message(f"解析到 {len(diff_files)} 个差异文件") return diff_files except Exception as e: self.log_message(f"解析HTML报告错误: {str(e)}") return [] def write_to_excel(self, excel_path, diff_data): """将差异数据写入Excel - 增强健壮性""" self.log_message("正在写入Excel文件...") excel = None workbook = None try: # 验证Excel文件存在 if not os.path.exists(excel_path): self.log_message(f"错误: Excel文件不存在 {excel_path}") return False # 使用win32com打开Excel excel = win32.gencache.EnsureDispatch('Excel.Application') excel.Visible = True excel.DisplayAlerts = False # 禁用警告提示 # 尝试打开工作簿 try: workbook = excel.Workbooks.Open(os.path.abspath(excel_path)) except Exception as e: self.log_message(f"打开Excel文件失败: {str(e)}") return False # 检查工作表是否存在 sheet_names = [sheet.Name for sheet in workbook.Sheets] if "一覧" not in sheet_names: self.log_message("错误: Excel文件中缺少'一覧'工作表") return False sheet = workbook.Sheets("一覧") # 从第6行开始写入数据 start_row = 6 for i, row_data in enumerate(diff_data): for j, value in enumerate(row_data[:6]): # 确保值是字符串类型 sheet.Cells(start_row + i, j + 1).Value = str(value) # 保存Excel workbook.Save() self.log_message(f"数据已写入Excel第{start_row}行开始") # 触发"作成"按钮 self.log_message("正在触发'作成'按钮...") try: # 查找按钮并点击 button = sheet.Buttons("作成") button.OnAction = "作成按钮的处理" button.Click() self.log_message("已触发'作成'按钮") # 等待处理完成 self.update_status("处理中...请等待") wait_time = 0 max_wait = 60 # 最大等待60秒 while self.processing and wait_time < max_wait: if excel.CalculationState == 0: # 0 = xlDone break time.sleep(1) wait_time += 1 self.log_message(f"处理中...({wait_time}秒)") if wait_time >= max_wait: self.log_message("警告: 处理超时") else: self.log_message("处理完成") return True except Exception as e: self.log_message(f"按钮操作失败: {str(e)}. 请手动点击'作成'按钮") return False except Exception as e: self.log_message(f"Excel操作失败: {str(e)}\n{traceback.format_exc()}") return False finally: # 确保正确关闭Excel try: if workbook: workbook.Close(SaveChanges=False) if excel: excel.Quit() except Exception as e: self.log_message(f"关闭Excel时出错: {str(e)}") def stop_processing(self): """停止处理""" self.processing = False self.stop_button.config(state=tk.DISABLED) self.run_button.config(state=tk.NORMAL) self.update_status("操作已停止") def process_folders(self, old_path, new_path, excel_file): """处理文件夹比较的线程函数""" try: # 步骤1: 生成HTML差异文件 self.update_status("生成HTML差异文件...") self.update_progress(20) # 使用临时文件存储HTML报告 with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as temp_file: output_html = temp_file.name if not self.run_winmerge(old_path, new_path, output_html): return # 步骤2: 将HTML文件与Excel放在同一目录 self.update_status("准备文件...") self.update_progress(40) excel_dir = os.path.dirname(excel_file) if excel_dir: target_html = os.path.join(excel_dir, "diff_report.html") shutil.copy(output_html, target_html) self.log_message(f"已将HTML文件复制到: {target_html}") # 步骤3: 解析HTML差异文件 self.update_status("解析差异数据...") self.update_progress(60) diff_data = self.parse_html_diff(output_html) if not diff_data: return # 步骤4: 写入Excel并触发按钮 self.update_status("写入Excel并触发处理...") self.update_progress(80) self.write_to_excel(excel_file, diff_data) # 完成 self.update_progress(100) self.update_status("处理完成!") self.log_message("文件夹比较流程执行完毕") except Exception as e: # 修复TypeError: 使用f-string记录异常 error_msg = f"执行过程中发生错误: {str(e)}\n{traceback.format_exc()}" self.log_message(error_msg) self.update_status("执行失败") finally: # 重新启用执行按钮 if self.processing: self.stop_processing() # 清理临时文件 if os.path.exists(output_html): try: os.remove(output_html) except: pass if __name__ == "__main__": root = tk.Tk() app = DiffProcessorApp(root) root.mainloop() 检查代码的问题,并提供完整代码实现我的要求
07-11
# mcp_server.py from datetime import datetime from mcp.server.fastmcp import FastMCP import logging import os import asyncio import hashlib import json import threading import time import numpy as np import faiss from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS from langchain_community.llms import OpenAIChat from langchain.chains import RetrievalQA from ollama_embeding import CustomEmbeding from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.document_loaders import ( TextLoader, PyPDFLoader, Docx2txtLoader, UnstructuredPowerPointLoader, UnstructuredExcelLoader, CSVLoader, UnstructuredHTMLLoader, UnstructuredMarkdownLoader, UnstructuredEmailLoader, UnstructuredFileLoader ) # 配置日志记录器 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) # 创建 FastMCP 实例 mcp = FastMCP("VectorService") class VectorService: def __init__(self): self.embedding_function = CustomEmbeding('shaw/dmeta-embedding-zh') self.docstore = InMemoryDocstore() self.index = faiss.IndexFlatL2(768) self.vector_store = None self.existing_index_path = "E:/llm_rag/faiss_index/index.faiss" self.existing_index_pkl_path = "E:/llm_rag/faiss_index/index.pkl" self.is_processing = False self.last_processed_count = 0 self.is_initialized = False # 添加初始化完成标志 self.load_or_init_vector_store() # 初始化向量存储 self.is_initialized = True # 初始化完成 def load_or_init_vector_store(self): if self.vector_store is not None: return self.vector_store # 已初始化 if os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path): vector_store = FAISS.load_local( "E:/llm_rag/faiss_index", embeddings=self.embedding_function, allow_dangerous_deserialization=True ) logger.info("Loaded existing vector store.") self.vector_store = vector_store return vector_store else: vector_store = FAISS( embedding_function=self.embedding_function, index=self.index, docstore=self.docstore, index_to_docstore_id={} ) logger.info("Initialized new vector store.") self.vector_store = vector_store return vector_store def get_id(self, file_path): """Generate file id""" return hashlib.md5(file_path.encode()).hexdigest() def load_document(self, file_path: str): file_ext = file_path.split('.')[-1].lower() logger.info(f"Loading document from {file_path}") loader_map = { 'txt': TextLoader, 'pdf': PyPDFLoader, 'docx': Docx2txtLoader, 'pptx': UnstructuredPowerPointLoader, 'xlsx': UnstructuredExcelLoader, 'csv': CSVLoader, 'html': UnstructuredHTMLLoader, 'htm': UnstructuredHTMLLoader, 'md': UnstructuredMarkdownLoader, 'eml': UnstructuredEmailLoader, 'msg': UnstructuredEmailLoader } if file_ext not in loader_map: logger.warning(f"Unsupported file type: {file_ext}") return None loader_class = loader_map.get(file_ext, UnstructuredFileLoader) loader = loader_class(file_path) try: documents = loader.load() logger.info(f"Loaded {len(documents)} documents from {file_path}") return documents except Exception as e: logger.error(f"Error loading {file_path}: {str(e)}") return None def _add_vector_metadata(self, file_name, file_name_path): """ 添加文件元数据 :return: """ docs = [] metadatas = [] try: file_stats = os.stat(file_name_path) file_size = file_stats.st_size res = self.load_document(file_name_path) if res: # 生成文件唯一标识(使用文件路径的哈希值) id = self.get_id(file_name_path) for doc in res: # 合并用户提供的元数据和文档自身的元数据 doc_metadata = doc.metadata.copy() doc_metadata.update({ "source": file_name, "file_path": file_name_path, "id": id, "upload_time": datetime.now().isoformat() }) # docs.append(doc.page_content.strip()) # 将文件名融入内容(提高文件名的权重) enhanced_content = f"文件名: {file_name}\n内容: {doc.page_content.strip()}" docs.append(enhanced_content) metadatas.append(doc_metadata) logger.info(f"Processed {file_name} ({file_size / (1024 * 1024.0):.2f} MB)") except Exception as e: logger.error(f"Error processing {file_name_path}: {str(e)}") return docs, metadatas def process_documents(self, data_path: str): """把所有文件进行批量向量化,添加文件唯一标识""" try: self.is_processing = True all_docs = [] all_metadatas = [] for root, dirs, files in os.walk(data_path): for file_name in files: file_name_path = os.path.join(root, file_name) logger.info(f"Processing file: {file_name_path}") # 调用 _add_vector_metadata 处理文件 docs, metadatas = self._add_vector_metadata( file_name=file_name, file_name_path=file_name_path ) # 累积结果 all_docs.extend(docs) all_metadatas.extend(metadatas) # 保存所有文件的向量数据 self._save_data_vector(docs=all_docs, metadatas=all_metadatas) self.last_processed_count = len(all_docs) self.is_processing = False return { "status": "success", "message": "Documents processed successfully", "document_count": len(all_docs) } except Exception as e: logger.error(f"Error processing documents: {str(e)}") self.is_processing = False return {"status": "error", "message": str(e)} def _save_data_vector(self, docs, metadatas): """Save the data vector to faiss""" self.vector_store = self.load_or_init_vector_store() docs = [doc for doc in docs if doc] try: logger.info("Starting embedding process...") self.vector_store.add_texts(texts=docs, metadatas=metadatas) logger.info("Embedding process completed.") except Exception as e: logger.error(f"An error occurred during embedding: {str(e)}") try: logger.info("Saving updated vector store...") self.vector_store.save_local("E:/llm_rag/faiss_index") logger.info("Updated vector store saved to E:/llm_rag/faiss_index.") except Exception as e: logger.error(f"An error occurred during saving: {str(e)}") return docs def check_process_status(self): """检查处理状态""" if self.is_processing: return { "status": "processing", "message": "Documents are being processed" } else: if os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path): if self.last_processed_count > 0: return { "status": "success", "message": "Vector data has been updated", "last_processed_count": self.last_processed_count } else: return { "status": "ready", "message": "Vector store exists but no new data processed" } else: return { "status": "empty", "message": "No vector store exists" } def add_vector(self, new_file_name_path: str, new_file_name: str): """添加单个文件的向量""" try: self.is_processing = True docs, metadatas = self._add_vector_metadata( file_name=new_file_name, file_name_path=new_file_name_path ) self._save_data_vector(docs=docs, metadatas=metadatas) self.last_processed_count = len(docs) self.is_processing = False return { "status": "success", "message": "Vector added successfully" } except Exception as e: logger.error(f"Error adding vector: {str(e)}") self.is_processing = False return { "status": "error", "message": str(e) } vector_service = VectorService() @mcp.tool() def process_documents(data_path: str): """处理指定路径下的所有文档并生成向量存储""" logger.info(f"Starting to process documents in {data_path}") return vector_service.process_documents(data_path) @mcp.tool() def check_process_status(): """检查处理状态""" logger.info("Checking process status") return vector_service.check_process_status() @mcp.tool() def add_vector(new_file_name_path: str, new_file_name: str): """添加单个文件的向量""" logger.info(f"Adding vector for file: {new_file_name_path}") return vector_service.add_vector(new_file_name_path, new_file_name) @mcp.tool(name="searchfile", description=f"根据关键词搜索文件并返回匹配的内容") def search_answer(query: str): """ 获取检索相关的文件 :param query: 用户问题 :return: 返回检索到的文档 """ if not vector_service.is_initialized: logger.info("Server is not initialized yet. Please wait.") return {"status": "error", "message": "Server is not initialized yet. Please wait."} logger.info(f"Searching for relevant documents: {query}") try: retriever = FAISS.load_local( "E:/llm_rag/faiss_index", CustomEmbeding('shaw/dmeta-embedding-zh'), allow_dangerous_deserialization=True ).as_retriever(search_kwargs={"k": 10}) docs = retriever.get_relevant_documents(query) logger.info(f"找到 {len(docs)} 个相关文档块") logger.info(f"docs:{docs}") # return docs results = [] for doc in docs: metadata = doc.metadata file_path = metadata.get("file_path", "") # 安全检查:确保文件在允许的目录内 allowed_dir = "E:\\llm_rag\\data\\" if file_path and file_path.startswith(allowed_dir): # 生成相对路径并构建下载URL download_url = os.path.relpath(file_path, allowed_dir) results.append({ "content": doc.page_content, # 文档内容 "download_url": download_url # 下载链接 }) return results except Exception as e: logger.error(f"搜索出错: {str(e)}") return {"status": "error", "message": str(e)} if __name__ == "__main__": mcp.settings.port = 8880 logger.info("Starting mcp server through MCP") mcp.run(transport="sse") # 使用标准输入输出通信 报了这个 + Exception Group Traceback (most recent call last): | File "E:\llm_rag\.venv\lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi | result = await app( # type: ignore[func-returns-value] | File "E:\llm_rag\.venv\lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__ | return await self.app(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\applications.py", line 112, in __call__ | await self.middleware_stack(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\middleware\errors.py", line 187, in __call__ | raise exc | File "E:\llm_rag\.venv\lib\site-packages\starlette\middleware\errors.py", line 165, in __call__ | await self.app(scope, receive, _send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\middleware\exceptions.py", line 62, in __call__ | await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app | raise exc | File "E:\llm_rag\.venv\lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app | await app(scope, receive, sender) | File "E:\llm_rag\.venv\lib\site-packages\starlette\routing.py", line 714, in __call__ | await self.middleware_stack(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\routing.py", line 734, in app | await route.handle(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\routing.py", line 288, in handle | await self.app(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\routing.py", line 76, in app | await wrap_app_handling_exceptions(app, request)(scope, receive, send) | File "E:\llm_rag\.venv\lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app | raise exc | File "E:\llm_rag\.venv\lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app | await app(scope, receive, sender) | File "E:\llm_rag\.venv\lib\site-packages\starlette\routing.py", line 73, in app | response = await f(request) | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\fastmcp\server.py", line 747, in sse_endpoint | return await handle_sse(request.scope, request.receive, request._send) # type: ignore[reportPrivateUsage] | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\fastmcp\server.py", line 680, in handle_sse | async with sse.connect_sse( | File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\contextlib.py", line 217, in __aexit__ | await self.gen.athrow(typ, value, traceback) | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\sse.py", line 146, in connect_sse | async with anyio.create_task_group() as tg: | File "E:\llm_rag\.venv\lib\site-packages\anyio\_backends\_asyncio.py", line 772, in __aexit__ | raise BaseExceptionGroup( | exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception) +-+---------------- 1 ---------------- | Exception Group Traceback (most recent call last): | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\sse.py", line 165, in connect_sse | yield (read_stream, write_stream) | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\fastmcp\server.py", line 685, in handle_sse | await self._mcp_server.run( | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\lowlevel\server.py", line 500, in run | async with AsyncExitStack() as stack: | File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\contextlib.py", line 714, in __aexit__ | raise exc_details[1] | File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\contextlib.py", line 217, in __aexit__ | await self.gen.athrow(typ, value, traceback) | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\lowlevel\server.py", line 125, in lifespan | yield {} | File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\contextlib.py", line 697, in __aexit__ | cb_suppress = await cb(*exc_details) | File "E:\llm_rag\.venv\lib\site-packages\mcp\shared\session.py", line 223, in __aexit__ | return await self._task_group.__aexit__(exc_type, exc_val, exc_tb) | File "E:\llm_rag\.venv\lib\site-packages\anyio\_backends\_asyncio.py", line 772, in __aexit__ | raise BaseExceptionGroup( | exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception) +-+---------------- 1 ---------------- | Traceback (most recent call last): | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\session.py", line 147, in _receive_loop | await super()._receive_loop() | File "E:\llm_rag\.venv\lib\site-packages\mcp\shared\session.py", line 374, in _receive_loop | await self._received_request(responder) | File "E:\llm_rag\.venv\lib\site-packages\mcp\server\session.py", line 175, in _received_request | raise RuntimeError( | RuntimeError: Received request before initialization was complete 如何解决
06-19
import webbrowser webbrowser.open("https://ai.csdn.net/?utm_source=cknow_pc_ntoolbar") # 默认浏览器打开 webbrowser.open_new_tab("https://8000.com") # 新标签页打开 import tkinter as tk import numpy as np from datetime import datetime, timedelta import random from urllib.request import urlopen from lxml import etree import chardet class WeatherForecastApp: def __init__(self, root): self.root = root self.root.title("24 小时天气预报 - 南昌") self.root.geometry("900x600") self.root.configure(bg="#f0f8ff") # 初始化数据 self.current_time = datetime.now() self.temperature_data, self.weather_conditions = self.fetch_weather_data() # 创建顶部标题 self.title_label = tk.Label( root, text=f"{self.current_time.strftime('%Y 年%m月%d日')} 24小时天气预报 - 南昌", font=("微软雅黑", 16, "bold"), bg="#4682b4", fg="white", padx=20, pady=10 ) self.title_label.pack(fill=tk.X) # 创建主框架 self.main_frame = tk.Frame(root, bg="#f0f8ff") self.main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=10) # 创建左侧信息面板 self.info_frame = tk.Frame(self.main_frame, bg="#e6f2ff", bd=2, relief=tk.RAISED) self.info_frame.pack(side=tk.LEFT, fill=tk.Y, padx=(0, 20), pady=10) # 当前天气信息 current_temp = self.temperature_data[-1][1] current_weather_condition = self.weather_conditions[-1] tk.Label(self.info_frame, text="当前天气", font=("微软雅黑", 12, "bold"), bg="#e6f2ff").pack(pady=(10, 5)) self.current_weather = tk.StringVar() self.current_weather.set(f" 温度: {current_temp}°C\n条件: {current_weather_condition}") tk.Label(self.info_frame, textvariable=self.current_weather, bg="#e6f2ff", justify=tk.LEFT).pack(pady=5) # 预测信息 tk.Label(self.info_frame, text="预测信息", font=("微软雅黑", 12, "bold"), bg="#e6f2ff").pack(pady=(15, 5)) self.forecast_info = tk.StringVar() self.update_forecast_info() tk.Label(self.info_frame, textvariable=self.forecast_info, bg="#e6f2ff", justify=tk.LEFT).pack(pady=5) # 创建图表区域 self.chart_frame = tk.Frame(self.main_frame, bg="white", bd=2, relief=tk.SUNKEN) self.chart_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, pady=10) self.canvas = tk.Canvas(self.chart_frame, bg="white", width=600, height=400) self.canvas.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 创建控制面板 self.control_frame = tk.Frame(root, bg="#f0f8ff") self.control_frame.pack(fill=tk.X, padx=20, pady=(0, 10)) self.update_btn = tk.Button( self.control_frame, text="更新预测", command=self.update_forecast, bg="#4caf50", fg="white", font=("微软雅黑", 10), padx=15 ) self.update_btn.pack(side=tk.LEFT) self.simulate_btn = tk.Button( self.control_frame, text="模拟变化", command=self.toggle_simulation, bg="#2196f3", fg="white", font=("微软雅黑", 10), padx=15 ) self.simulate_btn.pack(side=tk.LEFT, padx=10) self.status_var = tk.StringVar() self.status_var.set(" 就绪") tk.Label(self.control_frame, textvariable=self.status_var, bg="#f0f8ff", font=("微软雅黑", 10)).pack(side=tk.RIGHT) # 初始化绘图 self.draw_chart() self.simulation_active = False def fetch_weather_data(self): try: url = "https://tianqi.2345.com/nanchang/58606.htm" response = urlopen(url) html = response.read() # 检测网页编码 result = chardet.detect(html) encoding = result['encoding'] html = html.decode(encoding) tree = etree.HTML(html) # 提取温度和天气状况数据 temperature_data = [] weather_conditions = [] now = datetime.now() # 这里需要根据实际网页结构调整 XPath temp_nodes = tree.xpath('//div[@class="hours-weather"]//span[@class="tem"]/text()') weather_nodes = tree.xpath('//div[@class="hours-weather"]//span[@class="wea"]/text()') for i in range(min(24, len(temp_nodes), len(weather_nodes))): temp = float(temp_nodes[i].replace('℃', '')) weather = weather_nodes[i] time_point = now - timedelta(hours=23 - i) temperature_data.append((time_point, temp)) weather_conditions.append(weather) return temperature_data, weather_conditions except Exception as e: print(f"获取天气数据时出错: {e}") # 生成初始数据作为备用 return self.generate_initial_data(), ["晴天"] * 24 def generate_initial_data(self): """生成初始24小时温度数据""" # 使用NumPy创建时间序列 hours = np.arange(24) # 使用正弦函数模拟温度变化 (白天高,夜晚低) base_temp = 20 + 8 * np.sin(2 * np.pi * (hours - 6) / 24) # 添加随机波动 random_variation = np.random.normal(0, 1.5, 24) temperatures = np.round(base_temp + random_variation, 1) # 创建时间-温度数据对 now = datetime.now() data = [] for i, temp in enumerate(temperatures): time_point = now - timedelta(hours=23 - i) data.append((time_point, temp)) return data def draw_chart(self): """在画布上绘制温度折线图""" self.canvas.delete("all") # 获取画布尺寸 canvas_width = self.canvas.winfo_width() canvas_height = self.canvas.winfo_height() if canvas_width < 100 or canvas_height < 100: return # 防止初始化时尺寸过小 # 设置边距 margin = 50 chart_width = canvas_width - 2 * margin chart_height = canvas_height - 2 * margin # 绘制坐标轴 self.canvas.create_line(margin, margin, margin, margin + chart_height, width=2) # Y轴 self.canvas.create_line(margin, margin + chart_height, margin + chart_width, margin + chart_height, width=2) # X轴 # 计算温度范围 temps = [item[1] for item in self.temperature_data] min_temp = min(temps) max_temp = max(temps) temp_range = max_temp - min_temp temp_range = max(temp_range, 12) # 确保最小范围 # 添加Y轴刻度和标签 for i in range(6): # 6个刻度 value = min_temp + (i * temp_range / 5) y = margin + chart_height - (i * chart_height / 5) self.canvas.create_line(margin - 5, y, margin, y, width=1) self.canvas.create_text(margin - 10, y, text=f"{value:.1f}°C", anchor="e", font=("Arial", 9)) # 添加X轴刻度和标签 points = [] for i, (time_point, temp) in enumerate(self.temperature_data): x = margin + (i * chart_width / 23) y = margin + chart_height - ((temp - min_temp) / temp_range) * chart_height points.append((x, y)) # 每2小时显示一个时间标签 if i % 2 == 0: self.canvas.create_line(x, margin + chart_height, x, margin + chart_height + 5, width=1) self.canvas.create_text( x, margin + chart_height + 15, text=time_point.strftime("%H:%M"), font=("Arial", 9) ) # 在时间标签下方显示天气状况 weather = self.weather_conditions[i] self.canvas.create_text( x, margin + chart_height + 30, text=weather, font=("Arial", 9) ) # 绘制折线 if len(points) > 1: for i in range(1, len(points)): x1, y1 = points[i - 1] x2, y2 = points[i] self.canvas.create_line(x1, y1, x2, y2, fill="#1e88e5", width=3, smooth=True) # 绘制数据点 for x, y in points: # 根据温度选择点颜色 temp = min_temp + ((margin + chart_height - y) / chart_height) * temp_range color = "#ff5252" if temp > 25 else "#1e88e5" # 高温红色,低温蓝色 self.canvas.create_oval(x - 5, y - 5, x + 5, y + 5, fill=color, outline=color) # 在点上方显示温度值 if points.index((x, y)) % 3 == 0: # 每3小时显示一次温度 self.canvas.create_text(x, y - 15, text=f"{temp:.1f}°C", font=("Arial", 9), fill="#333") # 添加当前时间标记 last_x, last_y = points[-1] self.canvas.create_line(last_x, margin, last_x, margin + chart_height, fill="#4caf50", dash=(4, 2), width=2) self.canvas.create_text( last_x, margin - 20, text="当前时间", fill="#4caf50", font=("Arial", 10, "bold") ) # 添加标题 self.canvas.create_text( canvas_width / 2, 20, text="24小时温度变化趋势 - 南昌", font=("微软雅黑", 12, "bold"), fill="#333" ) def update_forecast_info(self): """更新预测信息""" temps = [item[1] for item in self.temperature_data] max_temp = max(temps) min_temp = min(temps) avg_temp = np.mean(temps) # 找出最高温和最低温的时间 max_index = temps.index(max_temp) min_index = temps.index(min_temp) max_time = self.temperature_data[max_index][0].strftime("%H:%M") min_time = self.temperature_data[min_index][0].strftime("%H:%M") info = f"最高温度: {max_temp:.1f}°C ({max_time})\n" info += f"最低温度: {min_temp:.1f}°C ({min_time})\n" info += f"平均温度: {avg_temp:.1f}°C\n" info += f"天气变化: {random.choice([' 稳定', '逐渐升温', '小幅波动'])}" self.forecast_info.set(info) def update_forecast(self): """更新天气预报""" self.status_var.set(" 更新预测中...") self.root.update() # 重新获取天气数据 self.temperature_data, self.weather_conditions = self.fetch_weather_data() # 更新当前天气显示 current_temp = self.temperature_data[-1][1] current_weather_condition = self.weather_conditions[-1] self.current_weather.set(f" 温度: {current_temp}°C\n条件: {current_weather_condition}") # 更新预测信息 self.update_forecast_info() # 重绘图表 self.draw_chart() self.status_var.set(" 预测已更新") def toggle_simulation(self): """切换模拟状态""" if not self.simulation_active: self.simulation_active = True self.simulate_btn.config(text=" 停止模拟") self.status_var.set(" 模拟运行中...") self.simulate_changes() else: self.simulation_active = False self.simulate_btn.config(text=" 模拟变化") self.status_var.set(" 模拟已停止") def simulate_changes(self): """模拟天气变化""" if not self.simulation_active: return # 更新数据:移除最旧的数据点,添加新的数据点 self.temperature_data.pop(0) self.weather_conditions.pop(0) # 基于当前时间生成新数据点 current_time = datetime.now() # 基于前一个温度值生成新温度(加入随机变化) last_temp = self.temperature_data[-1][1] hour = current_time.hour # 根据一天中的时间调整变化方向 if 6 <= hour < 12: # 早晨升温 change = random.uniform(0.1, 1.5) elif 12 <= hour < 18: # 下午可能降温 change = random.uniform(-0.5, 0.5) else: # 晚上降温 change = random.uniform(-1.5, -0.1) new_temp = round(last_temp + change, 1) new_weather = random.choice([" 晴天", "多云", "小雨", "阴天", "雷阵雨"]) self.temperature_data.append((current_time, new_temp)) self.weather_conditions.append(new_weather) # 更新UI self.current_weather.set(f" 温度: {new_temp}°C\n条件: {new_weather}") self.update_forecast_info() self.draw_chart() # 安排下一次更新 self.root.after(990000, self.simulate_changes) if __name__ == "__main__": root = tk.Tk() app = WeatherForecastApp(root) root.mainloop() 不能用import chardet 库和其他外置库,只能用import tkinter as tk import numpy as np from datetime import datetime, timedelta import random from urllib.request import urlopen from lxml import etree 库,修改后的完整代码
07-16
Traceback (most recent call last): File "E:\llm_rag\debug_code.py", line 233, in <module> vector_service = VectorService() File "E:\llm_rag\debug_code.py", line 42, in __init__ self.init_task = asyncio.create_task(self.async_init()) # 创建异步初始化任务 File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\asyncio\tasks.py", line 336, in create_task loop = events.get_running_loop() RuntimeError: no running event loop sys:1: RuntimeWarning: coroutine 'VectorService.async_init' was never awaited 这个如何解决啊,你提供的代码是这个# mcp_server.py import os import json import time import hashlib import logging import asyncio import faiss from datetime import datetime from mcp.server.fastmcp import FastMCP from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA from ollama_embeding import CustomEmbeding from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.document_loaders import ( TextLoader, PyPDFLoader, Docx2txtLoader, UnstructuredPowerPointLoader, UnstructuredExcelLoader, CSVLoader, UnstructuredHTMLLoader, UnstructuredMarkdownLoader, UnstructuredEmailLoader, UnstructuredFileLoader ) # 配置日志记录器 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) # 创建 FastMCP 实例 mcp = FastMCP("VectorService") class VectorService: def __init__(self): self.embedding_function = CustomEmbeding('shaw/dmeta-embedding-zh') self.docstore = InMemoryDocstore() self.index = faiss.IndexFlatL2(768) self.vector_store = None self.existing_index_path = "E:/llm_rag/faiss_index/index.faiss" self.existing_index_pkl_path = "E:/llm_rag/faiss_index/index.pkl" self.is_processing = False self.last_processed_count = 0 self.initialized = asyncio.Event() # 使用异步事件标志 self.init_task = asyncio.create_task(self.async_init()) # 创建异步初始化任务 async def async_init(self): """异步初始化向量存储""" try: if os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path): # 修复:使用绝对路径加载 self.vector_store = FAISS.load_local( "E:/llm_rag/faiss_index", embeddings=self.embedding_function, allow_dangerous_deserialization=True ) logger.info("Loaded existing vector store.") else: # 修复:正确初始化新的向量存储 self.vector_store = FAISS( embedding_function=self.embedding_function, index=self.index, docstore=self.docstore, index_to_docstore_id={} ) logger.info("Initialized new vector store.") self.initialized.set() # 标记初始化完成 except Exception as e: logger.error(f"Vector store initialization failed: {str(e)}") # 可根据需要添加重试逻辑 def get_id(self, file_path): """生成文件唯一ID""" return hashlib.md5(file_path.encode()).hexdigest() def load_document(self, file_path: str): """根据文件扩展名安全加载文档""" # 修复:添加路径安全性检查 if not os.path.exists(file_path) or not os.path.isfile(file_path): logger.error(f"Invalid file path: {file_path}") return None file_ext = file_path.split('.')[-1].lower() logger.info(f"Loading document from {file_path}") loader_map = { 'txt': TextLoader, 'pdf': PyPDFLoader, 'docx': Docx2txtLoader, 'pptx': UnstructuredPowerPointLoader, 'xlsx': UnstructuredExcelLoader, 'csv': CSVLoader, 'html': UnstructuredHTMLLoader, 'htm': UnstructuredHTMLLoader, 'md': UnstructuredMarkdownLoader, 'eml': UnstructuredEmailLoader, 'msg': UnstructuredEmailLoader } loader_class = loader_map.get(file_ext, UnstructuredFileLoader) try: loader = loader_class(file_path) return loader.load() except Exception as e: logger.error(f"Error loading {file_path}: {str(e)}") return None def _add_vector_metadata(self, file_name, file_path): """添加文件元数据并增强内容""" # 修复:增强路径验证 if not os.path.exists(file_path): logger.error(f"File not found: {file_path}") return [], [] try: docs, metadatas = [], [] file_stats = os.stat(file_path) file_size = file_stats.st_size documents = self.load_document(file_path) if not documents: return [], [] file_id = self.get_id(file_path) for doc in documents: metadata = doc.metadata.copy() metadata.update({ "source": file_name, "file_path": os.path.abspath(file_path), # 修复:使用绝对路径 "id": file_id, "upload_time": datetime.now().isoformat() }) # 增强内容:融入文件名提高权重 enhanced_content = f"文件名: {file_name}\n内容: {doc.page_content.strip()}" docs.append(enhanced_content) metadatas.append(metadata) logger.info(f"Processed {file_name} ({file_size / (1024 * 1024.0):.2f} MB)") return docs, metadatas except Exception as e: logger.error(f"Error processing {file_path}: {str(e)}") return [], [] async def process_documents(self, data_path: str): """批量处理文档并向量化""" # 修复:验证数据路径存在性 if not os.path.exists(data_path) or not os.path.isdir(data_path): return {"status": "error", "message": f"Invalid data path: {data_path}"} try: self.is_processing = True all_docs, all_metadatas = [], [] for root, _, files in os.walk(data_path): for file_name in files: file_path = os.path.join(root, file_name) docs, metas = self._add_vector_metadata(file_name, file_path) all_docs.extend(docs) all_metadatas.extend(metas) # 保存向量数据 await self._save_data_vector(all_docs, all_metadatas) self.last_processed_count = len(all_docs) self.is_processing = False return { "status": "success", "message": "Documents processed successfully", "document_count": len(all_docs) } except Exception as e: logger.error(f"Error processing documents: {str(e)}") self.is_processing = False return {"status": "error", "message": str(e)} async def _save_data_vector(self, docs, metadatas): """异步保存向量数据到FAISS""" await self.initialized.wait() # 等待初始化完成 try: # 过滤空文档 valid_docs = [doc for doc in docs if doc] if not valid_docs: logger.warning("No valid documents to add") return logger.info("Starting embedding process...") self.vector_store.add_texts(texts=valid_docs, metadatas=metadatas) logger.info("Embedding process completed.") logger.info("Saving updated vector store...") # 修复:确保保存目录存在 os.makedirs(os.path.dirname(self.existing_index_path), exist_ok=True) self.vector_store.save_local("E:/llm_rag/faiss_index") logger.info("Vector store saved successfully.") except Exception as e: logger.error(f"Error during vector save: {str(e)}") async def check_process_status(self): """检查处理状态""" await self.initialized.wait() # 确保初始化完成 if self.is_processing: return {"status": "processing", "message": "Documents are being processed"} elif os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path): return { "status": "success", "message": "Vector store is updated", "last_processed_count": self.last_processed_count } else: return {"status": "empty", "message": "No vector store exists"} async def add_vector(self, file_path: str, file_name: str): """添加单个文件向量""" # 修复:验证文件路径 if not os.path.exists(file_path): return {"status": "error", "message": f"File not found: {file_path}"} try: self.is_processing = True docs, metas = self._add_vector_metadata(file_name, file_path) await self._save_data_vector(docs, metas) self.last_processed_count = len(docs) self.is_processing = False return {"status": "success", "message": "Vector added successfully"} except Exception as e: logger.error(f"Error adding vector: {str(e)}") self.is_processing = False return {"status": "error", "message": str(e)} vector_service = VectorService() @mcp.tool() async def process_documents(data_path: str): """处理指定路径下的文档并生成向量存储""" logger.info(f"Processing documents in {data_path}") return await vector_service.process_documents(data_path) @mcp.tool() async def check_process_status(): """检查处理状态""" logger.info("Checking process status") return await vector_service.check_process_status() @mcp.tool() async def add_vector(file_path: str, file_name: str): """添加单个文件向量""" logger.info(f"Adding vector for: {file_path}") return await vector_service.add_vector(file_path, file_name) @mcp.tool(name="searchfile", description=f"根据关键词搜索文件并返回匹配的内容") async def search_answer(query: str): """根据关键词搜索文件并返回匹配内容""" await vector_service.initialized.wait() # 确保初始化完成 try: # 修复:使用已初始化的向量存储而非重新加载 if not vector_service.vector_store: logger.error("Vector store not initialized") return {"status": "error", "message": "Vector store not initialized"} retriever = vector_service.vector_store.as_retriever(search_kwargs={"k": 10}) # 执行搜索 docs = retriever.get_relevant_documents(query) logger.info(f"Found {len(docs)} relevant documents") # 处理结果 results = [] allowed_dir = "E:\\llm_rag\\data\\" for doc in docs: metadata = doc.metadata file_path = metadata.get("file_path", "") # 修复:增强路径安全性验证 if not file_path or not os.path.exists(file_path): continue # 规范化为绝对路径 abs_path = os.path.abspath(file_path) # 验证文件是否在允许目录下 if abs_path.startswith(os.path.abspath(allowed_dir)): # 生成相对路径作为下载URL download_url = os.path.relpath(abs_path, os.path.abspath(allowed_dir)) results.append({ "content": doc.page_content, "download_url": download_url, "file_name": os.path.basename(file_path) }) return results except Exception as e: logger.error(f"Search error: {str(e)}") return {"status": "error", "message": str(e)} if __name__ == "__main__": mcp.settings.port = 8880 logger.info("Starting MCP server with SSE transport") mcp.run(transport="sse") 请修改
06-20
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值