Traceback (most recent call last):
File "E:\llm_rag\debug_code.py", line 233, in <module>
vector_service = VectorService()
File "E:\llm_rag\debug_code.py", line 42, in __init__
self.init_task = asyncio.create_task(self.async_init()) # 创建异步初始化任务
File "C:\Users\raywe\AppData\Local\Programs\Python\Python310\lib\asyncio\tasks.py", line 336, in create_task
loop = events.get_running_loop()
RuntimeError: no running event loop
sys:1: RuntimeWarning: coroutine 'VectorService.async_init' was never awaited 这个如何解决啊,你提供的代码是这个# mcp_server.py
import os
import json
import time
import hashlib
import logging
import asyncio
import faiss
from datetime import datetime
from mcp.server.fastmcp import FastMCP
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from ollama_embeding import CustomEmbeding
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.document_loaders import (
TextLoader, PyPDFLoader, Docx2txtLoader, UnstructuredPowerPointLoader,
UnstructuredExcelLoader, CSVLoader, UnstructuredHTMLLoader,
UnstructuredMarkdownLoader, UnstructuredEmailLoader, UnstructuredFileLoader
)
# 配置日志记录器
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# 创建 FastMCP 实例
mcp = FastMCP("VectorService")
class VectorService:
def __init__(self):
self.embedding_function = CustomEmbeding('shaw/dmeta-embedding-zh')
self.docstore = InMemoryDocstore()
self.index = faiss.IndexFlatL2(768)
self.vector_store = None
self.existing_index_path = "E:/llm_rag/faiss_index/index.faiss"
self.existing_index_pkl_path = "E:/llm_rag/faiss_index/index.pkl"
self.is_processing = False
self.last_processed_count = 0
self.initialized = asyncio.Event() # 使用异步事件标志
self.init_task = asyncio.create_task(self.async_init()) # 创建异步初始化任务
async def async_init(self):
"""异步初始化向量存储"""
try:
if os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path):
# 修复:使用绝对路径加载
self.vector_store = FAISS.load_local(
"E:/llm_rag/faiss_index",
embeddings=self.embedding_function,
allow_dangerous_deserialization=True
)
logger.info("Loaded existing vector store.")
else:
# 修复:正确初始化新的向量存储
self.vector_store = FAISS(
embedding_function=self.embedding_function,
index=self.index,
docstore=self.docstore,
index_to_docstore_id={}
)
logger.info("Initialized new vector store.")
self.initialized.set() # 标记初始化完成
except Exception as e:
logger.error(f"Vector store initialization failed: {str(e)}")
# 可根据需要添加重试逻辑
def get_id(self, file_path):
"""生成文件唯一ID"""
return hashlib.md5(file_path.encode()).hexdigest()
def load_document(self, file_path: str):
"""根据文件扩展名安全加载文档"""
# 修复:添加路径安全性检查
if not os.path.exists(file_path) or not os.path.isfile(file_path):
logger.error(f"Invalid file path: {file_path}")
return None
file_ext = file_path.split('.')[-1].lower()
logger.info(f"Loading document from {file_path}")
loader_map = {
'txt': TextLoader,
'pdf': PyPDFLoader,
'docx': Docx2txtLoader,
'pptx': UnstructuredPowerPointLoader,
'xlsx': UnstructuredExcelLoader,
'csv': CSVLoader,
'html': UnstructuredHTMLLoader,
'htm': UnstructuredHTMLLoader,
'md': UnstructuredMarkdownLoader,
'eml': UnstructuredEmailLoader,
'msg': UnstructuredEmailLoader
}
loader_class = loader_map.get(file_ext, UnstructuredFileLoader)
try:
loader = loader_class(file_path)
return loader.load()
except Exception as e:
logger.error(f"Error loading {file_path}: {str(e)}")
return None
def _add_vector_metadata(self, file_name, file_path):
"""添加文件元数据并增强内容"""
# 修复:增强路径验证
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
return [], []
try:
docs, metadatas = [], []
file_stats = os.stat(file_path)
file_size = file_stats.st_size
documents = self.load_document(file_path)
if not documents:
return [], []
file_id = self.get_id(file_path)
for doc in documents:
metadata = doc.metadata.copy()
metadata.update({
"source": file_name,
"file_path": os.path.abspath(file_path), # 修复:使用绝对路径
"id": file_id,
"upload_time": datetime.now().isoformat()
})
# 增强内容:融入文件名提高权重
enhanced_content = f"文件名: {file_name}\n内容: {doc.page_content.strip()}"
docs.append(enhanced_content)
metadatas.append(metadata)
logger.info(f"Processed {file_name} ({file_size / (1024 * 1024.0):.2f} MB)")
return docs, metadatas
except Exception as e:
logger.error(f"Error processing {file_path}: {str(e)}")
return [], []
async def process_documents(self, data_path: str):
"""批量处理文档并向量化"""
# 修复:验证数据路径存在性
if not os.path.exists(data_path) or not os.path.isdir(data_path):
return {"status": "error", "message": f"Invalid data path: {data_path}"}
try:
self.is_processing = True
all_docs, all_metadatas = [], []
for root, _, files in os.walk(data_path):
for file_name in files:
file_path = os.path.join(root, file_name)
docs, metas = self._add_vector_metadata(file_name, file_path)
all_docs.extend(docs)
all_metadatas.extend(metas)
# 保存向量数据
await self._save_data_vector(all_docs, all_metadatas)
self.last_processed_count = len(all_docs)
self.is_processing = False
return {
"status": "success",
"message": "Documents processed successfully",
"document_count": len(all_docs)
}
except Exception as e:
logger.error(f"Error processing documents: {str(e)}")
self.is_processing = False
return {"status": "error", "message": str(e)}
async def _save_data_vector(self, docs, metadatas):
"""异步保存向量数据到FAISS"""
await self.initialized.wait() # 等待初始化完成
try:
# 过滤空文档
valid_docs = [doc for doc in docs if doc]
if not valid_docs:
logger.warning("No valid documents to add")
return
logger.info("Starting embedding process...")
self.vector_store.add_texts(texts=valid_docs, metadatas=metadatas)
logger.info("Embedding process completed.")
logger.info("Saving updated vector store...")
# 修复:确保保存目录存在
os.makedirs(os.path.dirname(self.existing_index_path), exist_ok=True)
self.vector_store.save_local("E:/llm_rag/faiss_index")
logger.info("Vector store saved successfully.")
except Exception as e:
logger.error(f"Error during vector save: {str(e)}")
async def check_process_status(self):
"""检查处理状态"""
await self.initialized.wait() # 确保初始化完成
if self.is_processing:
return {"status": "processing", "message": "Documents are being processed"}
elif os.path.exists(self.existing_index_path) and os.path.exists(self.existing_index_pkl_path):
return {
"status": "success",
"message": "Vector store is updated",
"last_processed_count": self.last_processed_count
}
else:
return {"status": "empty", "message": "No vector store exists"}
async def add_vector(self, file_path: str, file_name: str):
"""添加单个文件向量"""
# 修复:验证文件路径
if not os.path.exists(file_path):
return {"status": "error", "message": f"File not found: {file_path}"}
try:
self.is_processing = True
docs, metas = self._add_vector_metadata(file_name, file_path)
await self._save_data_vector(docs, metas)
self.last_processed_count = len(docs)
self.is_processing = False
return {"status": "success", "message": "Vector added successfully"}
except Exception as e:
logger.error(f"Error adding vector: {str(e)}")
self.is_processing = False
return {"status": "error", "message": str(e)}
vector_service = VectorService()
@mcp.tool()
async def process_documents(data_path: str):
"""处理指定路径下的文档并生成向量存储"""
logger.info(f"Processing documents in {data_path}")
return await vector_service.process_documents(data_path)
@mcp.tool()
async def check_process_status():
"""检查处理状态"""
logger.info("Checking process status")
return await vector_service.check_process_status()
@mcp.tool()
async def add_vector(file_path: str, file_name: str):
"""添加单个文件向量"""
logger.info(f"Adding vector for: {file_path}")
return await vector_service.add_vector(file_path, file_name)
@mcp.tool(name="searchfile", description=f"根据关键词搜索文件并返回匹配的内容")
async def search_answer(query: str):
"""根据关键词搜索文件并返回匹配内容"""
await vector_service.initialized.wait() # 确保初始化完成
try:
# 修复:使用已初始化的向量存储而非重新加载
if not vector_service.vector_store:
logger.error("Vector store not initialized")
return {"status": "error", "message": "Vector store not initialized"}
retriever = vector_service.vector_store.as_retriever(search_kwargs={"k": 10})
# 执行搜索
docs = retriever.get_relevant_documents(query)
logger.info(f"Found {len(docs)} relevant documents")
# 处理结果
results = []
allowed_dir = "E:\\llm_rag\\data\\"
for doc in docs:
metadata = doc.metadata
file_path = metadata.get("file_path", "")
# 修复:增强路径安全性验证
if not file_path or not os.path.exists(file_path):
continue
# 规范化为绝对路径
abs_path = os.path.abspath(file_path)
# 验证文件是否在允许目录下
if abs_path.startswith(os.path.abspath(allowed_dir)):
# 生成相对路径作为下载URL
download_url = os.path.relpath(abs_path, os.path.abspath(allowed_dir))
results.append({
"content": doc.page_content,
"download_url": download_url,
"file_name": os.path.basename(file_path)
})
return results
except Exception as e:
logger.error(f"Search error: {str(e)}")
return {"status": "error", "message": str(e)}
if __name__ == "__main__":
mcp.settings.port = 8880
logger.info("Starting MCP server with SSE transport")
mcp.run(transport="sse")
请修改