postfix队列堵塞,出现很多异常字段邮件

本文介绍了一种解决Postfix邮件服务器队列异常拥塞的方法,通过修改mysql-virtual-alias-maps.cf文件中的SQL配置,解决了队列中出现大量异常邮件的问题。

公司邮箱一直拥塞,清除队列后隔几天又有大量邮件处于队列中,mailq查看发现很多异常邮件,并且收取邮件后队列不减少。

100255_o40v_3090124.png

原因为postfix配置mysql-virtual-alias-maps.cf文件中sql配置出错

 select * from alias where address='%s'

修改为:

 select address from alias where address='%s'

之后就解决了队列异常拥塞问题。

 

感谢http://blog.youkuaiyun.com/zstack_org/article/details/71514332 

翻译原文:https://www.digitalocean.com/community/tutorials/how-to-configure-a-mail-server-using-postfix-dovecot-mysql-and-spamassassin

 

转载于:https://my.oschina.net/u/3090124/blog/1498477

不知道你要我改的是哪个文件 我把文件都给你贴出来了1.#E:\AI_System\main.py import os import sys import time import logging import logging.handlers import signal import json import traceback import atexit import threading import queue from pathlib import Path from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed # 修复导入问题 - 确保根目录在路径中 sys.path.insert(0, str(Path(__file__).parent.resolve())) try: from core.config_system import CoreConfig from core.command_listener import start_command_listener from agent.model_manager import ModelManager from core.cognitive_orchestrator import CognitiveOrchestrator from agent.environment_interface import EnvironmentInterface from agent.diagnostic_system import DiagnosticSystem from utils.path_utils import normalize_path, clean_path_cache except ImportError as e: print(f"❌ 关键模块导入失败: {e}") traceback.print_exc() sys.exit(1) # ====================== 基础路径配置 ====================== # 确保所有操作都在E盘 BASE_DRIVE = "E:" PROJECT_ROOT = Path(__file__).parent.resolve() WORKSPACE_PATH = Path(BASE_DRIVE) / "AI_Workspace" # 创建必要目录 WORKSPACE_PATH.mkdir(parents=True, exist_ok=True) # 全局变量 orchestrator = None model_manager = None environment_interface = None diagnostic_system = None command_listener = None shutdown_initiated = False progress_bar = None log_listener = None # 日志监听器全局引用 # ====================== 线程安全的日志处理器 ====================== class ConcurrentFileHandler(logging.FileHandler): """线程安全的文件日志处理器""" def __init__(self, filename, mode='a', encoding=None, delay=False): super().__init__(filename, mode, encoding, delay) self.lock = threading.RLock() self.last_rotate = time.time() self.max_bytes = 10 * 1024 * 1024 # 10MB self.backup_count = 5 def emit(self, record): """线程安全地记录日志并处理滚动""" with self.lock: # 检查是否需要滚动日志 if self.should_rotate(): self.do_rotate() super().emit(record) def should_rotate(self): """检查是否需要滚动日志""" if self.stream is None: self.stream = self._open() # 检查文件大小 if os.path.exists(self.baseFilename): if os.stat(self.baseFilename).st_size > self.max_bytes: return True # 检查时间(每天滚动一次) if time.time() - self.last_rotate > 86400: # 24小时 return True return False def do_rotate(self): """执行日志滚动""" if self.stream: self.stream.close() self.stream = None # 重命名现有日志文件 base, ext = os.path.splitext(self.baseFilename) for i in range(self.backup_count - 1, 0, -1): sfn = f"{base}.{i}{ext}" dfn = f"{base}.{i + 1}{ext}" if os.path.exists(sfn): if os.path.exists(dfn): os.remove(dfn) os.rename(sfn, dfn) dfn = f"{base}.1{ext}" if os.path.exists(dfn): os.remove(dfn) os.rename(self.baseFilename, dfn) self.last_rotate = time.time() self.stream = self._open() # ====================== 日志配置 ====================== def setup_logging() -> (logging.Logger, logging.handlers.QueueListener): """配置线程安全的日志系统""" logs_dir = WORKSPACE_PATH / "logs" logs_dir.mkdir(parents=True, exist_ok=True) # 配置日志级别 log_level_name = CoreConfig.get("log_level", "INFO").upper() log_level = getattr(logging, log_level_name, logging.INFO) # 创建唯一的日志文件名 log_file = logs_dir / f"system_{time.strftime('%Y%m%d_%H%M%S')}.log" # 配置根日志记录器 root_logger = logging.getLogger() root_logger.setLevel(log_level) # 移除所有现有处理器 for handler in root_logger.handlers[:]: root_logger.removeHandler(handler) handler.close() # 创建线程安全的文件处理器 file_handler = ConcurrentFileHandler(log_file, encoding='utf-8') file_handler.setLevel(log_level) # 控制台处理器 console_handler = logging.StreamHandler() console_handler.setLevel(log_level) # 日志格式 log_format = "%(asctime)s - [%(levelname)s] - %(name)s - %(message)s" formatter = logging.Formatter(log_format) file_handler.setFormatter(formatter) console_handler.setFormatter(formatter) # 创建队列处理器 log_queue = queue.Queue(-1) # 无限队列 queue_handler = logging.handlers.QueueHandler(log_queue) queue_handler.setLevel(log_level) # 创建队列监听器 queue_listener = logging.handlers.QueueListener( log_queue, file_handler, console_handler ) queue_listener.start() # 添加队列处理器 root_logger.addHandler(queue_handler) # 创建主日志记录器 main_logger = root_logger.getChild("Main") main_logger.info(f"⚙️ 日志系统初始化完成 (级别: {log_level_name})") main_logger.info(f"📝 日志文件位置: {log_file}") return main_logger, queue_listener # ====================== 模型路径处理 ====================== def get_model_paths(model_name: str) -> list: """返回所有可能的模型路径(仅限E盘)""" return [ WORKSPACE_PATH / "AI_Models" / model_name, Path(BASE_DRIVE) / "AI_Models" / model_name, Path(BASE_DRIVE) / "Models" / model_name, WORKSPACE_PATH / "models" / model_name ] def find_valid_model_path(model_name: str, logger: logging.Logger) -> Path: """在E盘查找有效的模型路径""" possible_paths = get_model_paths(model_name) for path in possible_paths: normalized_path = normalize_path(str(path)) if os.path.exists(normalized_path): logger.info(f"✅ 找到模型路径: {normalized_path}") return Path(normalized_path) # 没有找到有效路径 logger.critical(f"🛑 在E盘找不到模型: {model_name}") logger.critical("检查位置:") for path in possible_paths: logger.critical(f" - {path}") raise FileNotFoundError(f"在E盘找不到模型: {model_name}") # ====================== 模型注册函数 ====================== def register_models(model_manager, logger: logging.Logger) -> list: """异步注册所有模型并显示进度""" model_configs = { # 移除不存在的轻量模型 "TEXT_BASE": {"path": r"E:\AI_Models\Qwen2-7B", "type": "text", "priority": 1}, "TEXT_CHAT": {"path": r"E:\AI_Models\deepseek-7b-chat", "type": "text", "priority": 2}, # 暂时禁用有问题的模型 # "MULTIMODAL": {"path": r"E:\AI_Models\deepseek-vl2", "type": "multimodal", "priority": 3}, # "IMAGE_MODEL": {"path": r"E:\AI_Models\sdxl", "type": "image", "priority": 4}, # "YI_VL": {"path": r"E:\AI_Models\yi-vl", "type": "multimodal", "priority": 5} } # 按优先级排序模型 sorted_models = sorted(model_configs.items(), key=lambda x: x[1]["priority"]) registered_models = [] futures = {} # ✅ 关键修改1:添加路径存在性检查 with tqdm(total=len(model_configs), desc="🚀 注册模型") as pbar: with ThreadPoolExecutor(max_workers=3) as executor: for model_name, config in sorted_models: # 检查模型路径是否存在 if not Path(config["path"]).exists(): logger.warning(f"⚠️ 模型路径不存在: {model_name} -> {config['path']}") pbar.update(1) continue # 跳过不存在的路径 future = executor.submit( model_manager.register_model, model_name, config["path"], config["type"] ) futures[future] = model_name # ✅ 关键修改2:确保进度条正确更新 for future in as_completed(futures): model_name = futures[future] try: success = future.result() if success: registered_models.append(model_name) logger.info(f"✅ 注册成功: {model_name}") else: logger.error(f"❌ 注册失败: {model_name}") except Exception as e: logger.error(f"❌ 模型注册异常: {model_name} - {str(e)}", exc_info=True) finally: pbar.update(1) pbar.set_postfix_str(f"当前: {model_name}") return registered_models # ====================== 安全关闭处理 ====================== def shutdown_handler(signum=None, frame=None): """安全关闭系统(避免重复关闭)""" global shutdown_initiated, progress_bar, log_listener if shutdown_initiated: return shutdown_initiated = True logger = logging.getLogger("Main.Shutdown") if signum: signal_name = {signal.SIGINT: "Ctrl+C", signal.SIGTERM: "终止信号"}.get(signum, signum) logger.warning(f"⚠️ 接收到关闭信号: {signal_name}") else: logger.warning(f"⚠️ 接收到关闭请求") # 关闭顺序优化 shutdown_sequence = [ ("命令监听器", command_listener.stop if command_listener else None), ("协调器", orchestrator.shutdown if orchestrator else None), ("模型管理器", model_manager.shutdown if model_manager else None), ("环境接口", environment_interface.shutdown if environment_interface else None), ("诊断系统", diagnostic_system.shutdown if diagnostic_system else None) ] for name, shutdown_func in shutdown_sequence: if shutdown_func: try: logger.info(f"🛑 正在关闭{name}...") shutdown_func() logger.info(f"✅ {name}已关闭") except Exception as e: logger.error(f"❌ 关闭{name}失败: {str(e)}", exc_info=True) # 确保关闭进度条 if progress_bar: try: progress_bar.close() except: pass # 停止日志监听器 if log_listener: logger.info("🛑 正在停止日志监听器...") log_listener.stop() logger.info("✅ 日志监听器已停止") # 确保日志系统最后关闭 logging.shutdown() sys.exit(0) # ====================== 设置默认模型 ====================== def setup_default_model(): """ 设置默认模型 """ global model_manager, orchestrator logger = logging.getLogger("Main.DefaultModel") try: # 尝试加载 TEXT_BASE 作为默认模型 model_name = "TEXT_BASE" logger.info(f"⏳ 开始加载默认模型: {model_name}") # 同步加载模型 success, model = model_manager.load_model(model_name) if success and model is not None: # 使用协调器的 set_model 方法而不是 set_default_model if hasattr(orchestrator, 'set_model'): orchestrator.set_model(model_name, model) logger.info(f"✅ 默认模型已设置: {model_name}") else: # 如果协调器没有 set_model 方法,直接赋值 orchestrator.default_model = model logger.info(f"✅ 默认模型已设置 (直接赋值): {model_name}") return True else: raise RuntimeError("模型加载返回失败状态") except Exception as e: logger.error(f"❌ 默认模型设置异常: {str(e)}", exc_info=True) return False # ====================== 全局异常处理 ====================== def global_exception_handler(exc_type, exc_value, exc_traceback): """ 增强版全局异常处理器 关键改进: 1. 添加内存状态转储 2. 优化崩溃报告格式 3. 添加关键变量记录 4. 增强错误分析信息 """ if issubclass(exc_type, KeyboardInterrupt): sys.excepthook(exc_type, exc_value, exc_traceback) return # 获取日志记录器 logger = logging.getLogger("Main.Critical") # 记录原始异常 logger.critical("💥 未捕获的全局异常", exc_info=(exc_type, exc_value, exc_traceback)) try: # 创建崩溃报告 crash_report = { "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "system": { "python_version": sys.version, "platform": sys.platform, "executable": sys.executable }, "exception": { "type": str(exc_type), "message": str(exc_value), "traceback": traceback.format_tb(exc_traceback) }, "environment": { "workspace": str(WORKSPACE_PATH), "project_root": str(PROJECT_ROOT), "shutdown_initiated": shutdown_initiated }, "modules": { "orchestrator": "initialized" if orchestrator else "not initialized", "model_manager": "initialized" if model_manager else "not initialized", "diagnostic_system": "initialized" if diagnostic_system else "not initialized" } } # 添加内存分析(如果可用) try: import psutil process = psutil.Process() crash_report["memory"] = { "rss": process.memory_info().rss, "vms": process.memory_info().vms, "percent": process.memory_percent() } except ImportError: crash_report["memory"] = "psutil not available" # 保存崩溃报告 crash_dir = WORKSPACE_PATH / "crash_reports" crash_dir.mkdir(exist_ok=True) timestamp = time.strftime("%Y%m%d_%H%M%S") crash_file = crash_dir / f"crash_{timestamp}.json" with open(crash_file, "w", encoding="utf-8") as f: json.dump(crash_report, f, indent=2, ensure_ascii=False) logger.error(f"📝 崩溃报告已保存: {crash_file}") # 额外保存完整堆栈跟踪 stack_file = crash_dir / f"stacktrace_{timestamp}.log" with open(stack_file, "w", encoding="utf-8") as f: traceback.print_exception(exc_type, exc_value, exc_traceback, file=f) logger.error(f"📝 堆栈跟踪已保存: {stack_file}") except Exception as e: logger.error(f"❌ 保存崩溃报告失败: {str(e)}") # 回退到简单记录 traceback.print_exception(exc_type, exc_value, exc_traceback) # 尝试安全关闭 logger.critical("🛑 尝试安全关闭系统...") try: shutdown_handler() except Exception as e: logger.error(f"❌ 安全关闭失败: {str(e)}") sys.exit(1) # ====================== 命令处理器 ====================== def command_handler(command: str): """处理用户命令""" global orchestrator, model_manager, environment_interface, diagnostic_system logger = logging.getLogger("Main.Command") logger.info(f"📝 收到命令: {command}") # 转换为小写以便处理 cmd = command.strip().lower() if cmd == "help": return { "status": "success", "commands": { "help": "显示此帮助信息", "status": "显示系统状态", "models": "列出所有可用模型", "load [model]": "加载指定模型", "unload [model]": "卸载指定模型", "shutdown": "安全关闭系统" } } elif cmd == "status": # 获取系统状态 status = { "system": { "uptime": time.time() - start_time if 'start_time' in globals() else 0, "shutdown_initiated": shutdown_initiated } } # 添加模型管理器状态 if model_manager: status["model_manager"] = model_manager.get_status() # 添加协调器状态 if orchestrator: status["orchestrator"] = orchestrator.get_status() return status elif cmd == "models": # 列出所有已注册模型 if model_manager: return { "registered_models": model_manager.registered_models, "loaded_models": list(model_manager.loaded_models.keys()) } return {"error": "模型管理器未初始化"} elif cmd.startswith("load "): # 加载指定模型 model_name = cmd[5:].upper() if model_manager: try: # 异步加载模型 future = model_manager.load_model(model_name) return {"status": "loading", "model": model_name, "message": "模型正在异步加载"} except Exception as e: return {"error": f"加载模型失败: {str(e)}"} return {"error": "模型管理器未初始化"} elif cmd.startswith("unload "): # 卸载指定模型 model_name = cmd[7:].upper() if model_manager: try: model_manager.unload_model(model_name) return {"status": "success", "model": model_name, "message": "模型已卸载"} except Exception as e: return {"error": "卸载模型失败: {str(e)}"} return {"error": "模型管理器未初始化"} elif cmd == "shutdown": # 安全关闭系统 return {"action": "shutdown", "message": "系统将在5秒内关闭"} else: return {"error": "未知命令", "available_commands": ["help", "status", "models", "load", "unload", "shutdown"]} # ====================== 主函数 ====================== def main(): """主函数(完全E盘操作)""" global orchestrator, model_manager, environment_interface, diagnostic_system global command_listener, progress_bar, log_listener, start_time # 记录启动时间 start_time = time.time() # 设置全局异常处理器 sys.excepthook = global_exception_handler # 清理路径缓存 clean_path_cache() # 初始化配置系统 CoreConfig.initialize() # 设置日志 - 修复返回值问题 logger, log_listener = setup_logging() # 注册退出处理 atexit.register(shutdown_handler) # 设置信号处理 signal.signal(signal.SIGINT, shutdown_handler) signal.signal(signal.SIGTERM, shutdown_handler) # 记录启动信息 logger.info("=" * 70) logger.info(f"🌟 {'AI系统 - 弹性星型架构':^60} 🌟") logger.info("=" * 70) logger.info(f"🚀 工作空间: {WORKSPACE_PATH}") logger.info(f"📂 项目目录: {PROJECT_ROOT}") logger.info(f"🐍 Python版本: {sys.version}") logger.info(f"🖥️ 操作系统: {sys.platform}") logger.info("-" * 70) # 创建启动进度条 progress_bar = tqdm( total=100, desc="🚀 系统启动中", bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]", dynamic_ncols=True ) try: # 初始化模型管理器 model_manager_config = CoreConfig.get("model_manager", {}) cache_dir = WORKSPACE_PATH / "model_cache" cache_dir.mkdir(parents=True, exist_ok=True) model_manager = ModelManager( config=model_manager_config, cache_dir=str(cache_dir), use_gpu=model_manager_config.get("use_gpu", True), max_models_in_memory=model_manager_config.get("max_models_in_memory", 3) ) logger.info("✅ 模型管理器初始化完成") progress_bar.update(10) # 更新进度条 # 注册所有模型 progress_bar.set_description("📦 注册模型中") progress_bar.refresh() registered_models = register_models(model_manager, logger) # 验证已注册模型 logger.info("🔍 验证已注册模型...") valid_models = model_manager.validate_registered_models(logger) logger.info(f"✅ 有效模型: {len(valid_models)}/{len(registered_models)}") # 检查是否有有效模型 if not valid_models: logger.critical("🛑 没有有效模型,系统无法启动") shutdown_handler() return logger.info(f"✅ 已注册模型: {registered_models}") progress_bar.update(30) # 更新进度条 progress_bar.set_description("🚀 系统启动中") # 重置描述 # 初始化星型协调器 progress_bar.set_description("🧠 初始化协调器") progress_bar.refresh() cognitive_config = CoreConfig.get("cognitive_config", {}) state_dir = WORKSPACE_PATH / "system_state" state_dir.mkdir(parents=True, exist_ok=True) cognitive_config["state_dir"] = str(state_dir) orchestrator = CognitiveOrchestrator(config=cognitive_config) logger.info("✅ 星型协调器初始化完成") progress_bar.update(15) # 更新进度条 progress_bar.set_description("🚀 系统启动中") # 重置描述 # 设置默认模型 setup_default_model() # 初始化卫星模块 progress_bar.set_description("🛰️ 初始化卫星模块") progress_bar.refresh() ei_config = CoreConfig.get("environment_interface", {}) ds_config = CoreConfig.get("diagnostic_system", {}) environment_interface = EnvironmentInterface( name="环境接口", coordinator=orchestrator, config=ei_config ) diagnostic_system = DiagnosticSystem( name="诊断系统", coordinator=orchestrator, config=ds_config ) # 动态注册模块 register_method = None if hasattr(orchestrator, 'register_module'): register_method = orchestrator.register_module elif hasattr(orchestrator, 'add_module'): register_method = orchestrator.add_module else: logger.warning("⚠️ 协调器没有标准注册方法,使用直接赋值") orchestrator.modules = {} # 注册模块 module_mapping = { "environment": environment_interface, "diagnostic": diagnostic_system } for name, module in module_mapping.items(): if register_method: register_method(name, module) else: orchestrator.modules[name] = module logger.info("✅ 卫星模块初始化完成") progress_bar.update(15) # 更新进度条 progress_bar.set_description("🚀 系统启动中") # 重置描述 except Exception as e: logger.error(f"❌ 系统初始化失败: {e}", exc_info=True) if progress_bar: progress_bar.close() sys.exit(1) # 连接模块 try: progress_bar.set_description("🔗 连接模块中") progress_bar.refresh() if hasattr(orchestrator, 'connect_modules'): if orchestrator.connect_modules(): logger.info("✅ 星型架构模块连接成功") else: logger.warning("⚠️ 模块连接存在问题,系统进入降级模式") else: logger.info("ℹ️ 协调器没有 connect_modules 方法,跳过模块连接") progress_bar.update(10) # 更新进度条 progress_bar.set_description("🚀 系统启动中") # 重置描述 except Exception as e: logger.error(f"❌ 模块连接失败: {e}", exc_info=True) if progress_bar: progress_bar.close() sys.exit(1) # 加载基础模型 base_model_key = "TEXT_BASE" base_model_info = CoreConfig.get("model_settings.TEXT_BASE", {}) model_name = base_model_info.get("name", "Qwen2-7B") try: progress_bar.set_description(f"🤖 加载 {model_name}") progress_bar.refresh() # 同步加载基础模型 success, model = model_manager.load_model(base_model_key) if success: logger.info(f"✅ 基础模型加载成功: {base_model_key}") # 使用协调器的 set_model 方法而不是 set_default_model if hasattr(orchestrator, 'set_model'): orchestrator.set_model(base_model_key, model) else: # 如果协调器没有 set_model 方法,直接赋值 orchestrator.default_model = model else: raise RuntimeError(f"模型加载返回失败状态: {base_model_key}") progress_bar.update(10) progress_bar.set_description("🚀 系统启动中") except Exception as e: logger.error(f"❌ 基础模型加载失败: {e}", exc_info=True) # 检查是否有其他可用模型 fallback_model = None for model_name in registered_models: if model_name != base_model_key: try: success, model = model_manager.load_model(model_name) if success: fallback_model = model_name logger.warning(f"⚠️ 使用备用模型: {fallback_model}") # 使用协调器的 set_model 方法而不是 set_default_model if hasattr(orchestrator, 'set_model'): orchestrator.set_model(fallback_model, model) else: # 如果协调器没有 set_model 方法,直接赋值 orchestrator.default_model = model break except: continue if not fallback_model: logger.critical("🛑 没有可用模型,系统无法启动") shutdown_handler() return # 启动命令监听器 try: progress_bar.set_description("📡 启动命令监听") progress_bar.refresh() command_listener = start_command_listener( command_handler=command_handler, shutdown_handler=shutdown_handler ) logger.info("✅ 命令监听器已启动") progress_bar.update(10) # 更新进度条 except Exception as e: logger.error(f"❌ 命令监听器启动失败: {e}", exc_info=True) if progress_bar: progress_bar.close() shutdown_handler() return # 完成启动进度 progress_bar.set_description("🎉 系统准备就绪") progress_bar.update(10) # 最后10% time.sleep(0.5) # 让用户看到完成状态 progress_bar.close() logger.info("🌟 系统准备就绪! 输入命令开始交互 ('help' 查看命令列表)") logger.info("-" * 70) # 主循环 try: while True: time.sleep(0.1) # 降低CPU占用 # 检查命令队列 if command_listener and hasattr(command_listener, 'command_queue'): while not command_listener.command_queue.empty(): command = command_listener.command_queue.get() response = command_handler(command) # 处理关闭指令 if isinstance(response, dict) and response.get("action") == "shutdown": shutdown_handler() return # 打印响应 if response: print("\n" + ("-" * 50)) if isinstance(response, dict): response_str = json.dumps(response, indent=2, ensure_ascii=False) print(f"系统响应:\n{response_str}") else: print(f"系统响应: {str(response)}") print("-" * 50 + "\n") except Exception as e: logger.critical(f"🔥 主循环错误: {e}", exc_info=True) shutdown_handler() if __name__ == "__main__": try: main() except Exception as e: print(f"🔥 主函数异常: {e}") traceback.print_exc() sys.exit(1) 2.#E:\AI_System\agent\cognitive_architecture.py import sys import logging import json import time import threading from agent.base_module import UnifiedCognitiveModule from core.module_manager import ModuleManager from core.message_bus import MessageBus from core.message import Message, MessageType from agent.environment_interface import EnvironmentInterface from agent.health_monitor import HealthMonitor class CognitiveSystem(UnifiedCognitiveModule): """核心认知系统 - 完整优化版""" VERSION = "1.6.0" DEFAULT_CONFIG = { "reasoning_depth": 3, "memory_limit": 1000, "auto_reflection": True, "learning_threshold": 0.8, "error_recovery": True, "max_concurrent_tasks": 5, "module_auto_recovery": True } def __init__(self, name: str, model_manager, config: dict = None): # 初始化基类 super().__init__( name=name, coordinator=self, # 关键修复:使用self作为协调器 config=config or self.DEFAULT_CONFIG ) # 关键修复:创建消息总线实例 self.message_bus = MessageBus() # 模型管理器 self.model_manager = model_manager # 初始化模块管理器 self.module_manager = ModuleManager(coordinator=self) # 命令处理器映射 self.command_handlers = { "help": self.handle_help, "hi": self.handle_greeting, "hello": self.handle_greeting, "你好": self.handle_greeting, "在吗": self.handle_greeting, "status": self.handle_status, "mode": self.handle_mode, "models": self.handle_models, "model": self.handle_models, "diagnose": self.handle_diagnose, "modules": self.handle_modules, "load": self.handle_load_module, "unload": self.handle_unload_module, "reload": self.handle_reload_module, "exit": self.handle_exit, "quit": self.handle_exit } # 系统状态 self.mode = "TASK_EXECUTION" self.memory = { "short_term": [], "long_term": {}, "last_accessed": time.time() } self.start_time = time.time() # 添加启动时间记录 # 添加running属性(修复线程监控问题) self.running = True # 模块初始化状态 self.initialized = False # 启动模块监控线程 self.monitor_thread = threading.Thread(target=self._monitor_modules, daemon=True) self.monitor_thread.start() def initialize(self) -> bool: """初始化认知系统""" try: self.logger.info(f"✅ 认知系统初始化开始 (版本 {self.VERSION})") # 注册核心模块 self.module_manager.register_module("EnvironmentInterface", EnvironmentInterface) self.module_manager.register_module("HealthMonitor", HealthMonitor) # 加载环境接口模块 env_config = self.config.get("environment", {}) if self.module_manager.load_module("EnvironmentInterface", env_config): self.logger.info("✅ 环境接口模块加载成功") else: self.logger.error("❌ 环境接口模块加载失败") # 加载健康监控模块 health_config = self.config.get("health", {"interval": 15}) if self.module_manager.load_module("HealthMonitor", health_config): self.logger.info("✅ 健康监控模块加载成功") else: self.logger.error("❌ 健康监控模块加载失败") # 初始化完成 self.initialized = True self.logger.info(f"✅ 认知系统初始化完成 (模式: {self.mode})") return True except Exception as e: self.logger.error(f"❌ 认知系统初始化失败: {str(e)}", exc_info=True) return False def shutdown(self) -> bool: """关闭认知系统""" try: self.logger.info("🛑 关闭认知系统开始") # 设置运行标志为False self.running = False # 停止监控线程 if self.monitor_thread.is_alive(): self.monitor_thread.join(timeout=5.0) # 卸载所有模块 self.module_manager.unload_all_modules() # 清理资源 self.message_bus.shutdown() self.module_manager = None self.initialized = False self.logger.info("✅ 认知系统已完全关闭") return True except Exception as e: self.logger.error(f"❌ 关闭系统失败: {str(e)}", exc_info=True) return False def process(self, input_data: dict) -> dict: """处理输入数据""" try: self.logger.debug(f"🧠 处理输入: {type(input_data)}") # 处理不同类型输入 if isinstance(input_data, str): return self.process_command(input_data) elif isinstance(input_data, dict): command = input_data.get("command") if command: return self.process_command(command) return {"error": "无效的输入格式"} else: return {"error": "不支持的输入类型"} except Exception as e: self.logger.error(f"处理输入失败: {str(e)}", exc_info=True) return {"error": f"处理失败: {str(e)}"} def process_command(self, command: str) -> dict: """处理用户命令(返回字典格式)""" try: self.logger.info(f"🧠 处理命令: {command}") # 分割命令和参数 parts = command.split(maxsplit=1) cmd = parts[0].lower() arg = parts[1] if len(parts) > 1 else "" # 查找命令处理器 handler = self.command_handlers.get(cmd, self.handle_default) # 尝试解析JSON格式输入 try: arg = json.loads(arg) except json.JSONDecodeError: pass # 保持原始字符串格式 result = handler(arg) # 确保返回字典格式 if isinstance(result, str): return {"response": result} elif isinstance(result, dict): return result else: return {"response": str(result)} except Exception as e: self.logger.error(f"命令处理失败: {str(e)}", exc_info=True) return {"error": f"处理命令时出错: {str(e)}"} # === 命令处理函数 === def handle_greeting(self, arg: str) -> str: """处理问候命令""" return f"你好,我是{self.name}!有什么可以帮您?" def handle_help(self, arg: str) -> str: """处理帮助命令""" help_text = """ === 高级命令系统 === 基础命令: help - 显示此帮助信息 exit/quit - 退出系统 status - 查看系统状态 mode [mode]- 切换工作模式 (reflect, task, learn) 系统控制: models - 显示已加载模型 model - 同 models diagnose - 执行系统诊断 config [key] [value] - 修改配置 modules - 查看模块状态 load [module] - 加载模块 unload [module] - 卸载模块 reload [module] - 重新加载模块 多行输入: 输入多行命令时,在最后一行以 ;; 结束 """ return help_text def handle_status(self, arg: str) -> str: """处理状态查询命令""" return ( f"系统状态:\n" f"- 认知系统: {self.name} v{self.VERSION}\n" f"- 当前模式: {self.mode}\n" f"- 最后访问: {time.ctime(self.memory['last_accessed'])}\n" f"- 短期记忆: {len(self.memory['short_term'])}/{self.config['memory_limit']} 条\n" f"- 运行时间: {time.time() - self.start_time:.1f}秒" ) def handle_mode(self, arg: str) -> str: """处理模式切换命令""" if not arg: return "请指定模式: reflect, task, learn" mode_map = { "reflect": "SELF_REFLECTION", "task": "TASK_EXECUTION", "learn": "LEARNING" } new_mode = mode_map.get(arg.lower(), "") if new_mode: self.set_mode(new_mode) return f"已切换到 {new_mode} 模式" return f"❌ 无效模式: {arg} (可用选项: reflect, task, learn)" def handle_models(self, arg: str) -> str: """处理模型查询命令""" try: # 获取模型信息 # 关键修复:使用更安全的方式访问模型信息 loaded_models = [] if hasattr(self.model_manager, 'loaded_models'): loaded_models = list(self.model_manager.loaded_models.keys()) registered_models = [] if hasattr(self.model_manager, '_persistent_registry'): registered_models = list(self.model_manager._persistent_registry.keys()) # 构建模型信息列表 models_info = [] for name in registered_models: status = "✅ 已加载" if name in loaded_models else "❌ 未加载" # 安全获取路径信息 path_info = getattr(self.model_manager, '_persistent_registry', {}).get(name, {}).get('path', '未知路径') models_info.append(f"- {name}: {path_info} ({status})") return "已配置模型:\n" + "\n".join(models_info) if models_info else "❌ 没有找到已配置的模型" except Exception as e: return f"❌ 获取模型信息失败: {str(e)}" def handle_diagnose(self, arg: str) -> str: """处理诊断命令 - 增强版""" try: response = "系统诊断报告:\n" # 模块状态 loaded_modules = self.module_manager.list_modules() if self.module_manager else [] response += f"- 已加载模块: {len(loaded_modules)}个\n" # 尝试获取健康监控报告 health_monitor = self.module_manager.get_module("HealthMonitor") if self.module_manager else None if health_monitor: health_report = health_monitor.get_health_status() for module, status in health_report.items(): response += f"- {module}: {status['status']} ({status.get('message', '')})\n" else: response += "- 健康监控模块未加载\n" return response except Exception as e: return f"❌ 执行诊断失败: {str(e)}" def handle_modules(self, arg: str) -> str: """处理模块查询命令""" if not self.module_manager: return "❌ 模块管理器未初始化" loaded_modules = self.module_manager.list_modules() registered_modules = list(self.module_manager.module_classes.keys()) response = "已注册模块:\n" + "\n".join([f"- {name}" for name in registered_modules]) response += "\n\n已加载模块:\n" + "\n".join([f"- {name}" for name in loaded_modules]) return response def handle_load_module(self, arg: str) -> str: """处理模块加载命令""" if not arg: return "请指定要加载的模块名称" if not self.module_manager: return "❌ 模块管理器未初始化" module_config = self.config.get("modules", {}).get(arg, {}) success = self.module_manager.load_module(arg, module_config) return f"✅ 模块 {arg} 加载成功" if success else f"❌ 模块 {arg} 加载失败" def handle_unload_module(self, arg: str) -> str: """处理模块卸载命令""" if not arg: return "请指定要卸载的模块名称" if not self.module_manager: return "❌ 模块管理器未初始化" success = self.module_manager.unload_module(arg) return f"✅ 模块 {arg} 卸载成功" if success else f"❌ 模块 {arg} 卸载失败" def handle_reload_module(self, arg: str) -> str: """处理模块重载命令""" if not arg: return "请指定要重载的模块名称" if not self.module_manager: return "❌ 模块管理器未初始化" module_config = self.config.get("modules", {}).get(arg, {}) success = self.module_manager.reload_module(arg, module_config) return f"✅ 模块 {arg} 重载成功" if success else f"❌ 模块 {arg} 重载失败" def handle_exit(self, arg: str) -> dict: """处理退出命令""" return {"action": "shutdown", "message": "正在关闭系统..."} def handle_default(self, command: str) -> str: """默认命令处理器""" return f"正在处理您的请求: {command}..." # === 内部辅助方法 === def _monitor_modules(self): """监控模块健康状态的后台线程""" while self.running: time.sleep(10) # 每10秒检查一次 try: if self.module_manager and self.initialized: health_monitor = self.module_manager.get_module("HealthMonitor") if health_monitor: status_report = health_monitor.get_health_status() self.logger.debug(f"模块健康报告: {status_report}") # 自动恢复异常模块 if self.config.get("module_auto_recovery", True): for module_name, status in status_report.items(): if status.get("status") != "healthy": self.logger.warning(f"⚠️ 模块 {module_name} 状态异常,尝试恢复...") self.module_manager.reload_module(module_name) except Exception as e: self.logger.error(f"模块监控错误: {str(e)}") def get_current_mode(self): """获取当前模式""" return self.mode def set_mode(self, new_mode: str): """切换模式""" valid_modes = ["SELF_REFLECTION", "TASK_EXECUTION", "LEARNING"] if new_mode in valid_modes: self.mode = new_mode self.logger.info(f"切换到 {new_mode} 模式") return True else: self.logger.warning(f"无效模式: {new_mode}") return False def get_health_status(self) -> dict: """获取健康状态报告""" return { "status": "running" if self.running else "stopped", "version": self.VERSION, "mode": self.mode, "memory_usage": f"{len(self.memory['short_term'])}/{self.config['memory_limit']}", "last_accessed": self.memory["last_accessed"], "modules": self.module_manager.list_modules() if self.module_manager else [] } 3.# E:\AI_System\core\cognitive_orchestrator.py import os import json import time import logging from pathlib import Path from agent.model_manager import ModelManager from agent.environment_interface import EnvironmentInterface from agent.diagnostic_system import DiagnosticSystem class CognitiveOrchestrator: """弹性星型架构中枢(增强完整版)""" def __init__(self, config: dict): # 关键修复:添加name属性并使用安全访问 self.name = config.get("name", "CognitiveOrchestrator") if config else "CognitiveOrchestrator" self.logger = logging.getLogger("Orchestrator") self.config = config if config else {} self.state_dir = self._get_state_dir() # 安全初始化卫星模块 self.modules = {} self._initialize_modules() self.fallback_mode = False # 模块访问方法 self.module_access = { "environment": self.modules.get("environment"), "diagnostic": self.modules.get("diagnostic") } self.logger.info(f"✅ 星型协调器初始化完成: {self.name}") def _get_state_dir(self) -> Path: """确保状态目录在E盘""" state_dir = Path(self.config.get("state_dir", "E:/AI_System/system_state")) if not state_dir.exists(): try: state_dir.mkdir(parents=True, exist_ok=True) self.logger.info(f"📁 创建状态目录: {state_dir}") except Exception as e: self.logger.error(f"❌ 创建状态目录失败: {e}") # 回退到临时目录 fallback_dir = Path("E:/temp/system_state") fallback_dir.mkdir(parents=True, exist_ok=True) return fallback_dir return state_dir def _initialize_modules(self): """安全初始化所有认知模块""" # 初始化环境接口 try: self.modules["environment"] = EnvironmentInterface( name="EnvironmentInterface", coordinator=self, config=self.config.get("environment", {}) ) self.logger.info("✅ 环境接口初始化成功") except Exception as e: self.logger.error(f"❌ 环境接口初始化失败: {str(e)}") # 创建空环境接口防止崩溃 self.modules["environment"] = EnvironmentInterface( name="DummyEnvironment", coordinator=self, config={} ) # 初始化诊断系统 try: self.modules["diagnostic"] = DiagnosticSystem( name="DiagnosticSystem", coordinator=self, config=self.config.get("diagnostic", {}) ) self.logger.info("✅ 诊断系统初始化成功") except Exception as e: self.logger.error(f"❌ 诊断系统初始化失败: {str(e)}") self.modules["diagnostic"] = None # 保留为None def connect_modules(self): """连接卫星模块(弹性连接)""" success_count = 0 try: self.logger.info("🔗 开始连接模块到协调器...") for module_name, module in self.modules.items(): if module is None: self.logger.warning(f"⚠️ 跳过空模块: {module_name}") continue try: # 使用安全信息传递 module.set_orchestrator({ "name": self.name, "instance": self }) self.logger.info(f"✅ 模块连接成功: {module_name}") success_count += 1 except Exception as e: self.logger.error(f"❌ 模块连接失败: {module_name} - {str(e)}", exc_info=True) if success_count > 0: self.logger.info(f"✅ {success_count}个模块连接完成") return True else: self.logger.critical("🛑 所有模块连接均失败") self.activate_fallback_mode() return False except Exception as e: self.logger.critical(f"❌ 模块连接过程发生未知错误: {e}", exc_info=True) self.activate_fallback_mode() return False def activate_fallback_mode(self): """激活故障隔离模式(弹性架构核心)""" self.fallback_mode = True self.logger.warning("⚠️ 进入故障隔离模式,核心功能保持运行") # 关闭故障模块的连接 for name, module in self.modules.items(): if module is None: continue try: if hasattr(module, "disconnect") and callable(module.disconnect): module.disconnect() self.logger.warning(f"🔌 模块已隔离: {name}") except Exception as e: self.logger.error(f"❌ 隔离模块失败: {name} - {e}") def is_module_available(self, module_name: str) -> bool: """检查模块是否可用""" module = self.modules.get(module_name) if module is None: return False try: # 安全检查健康状态 healthy = not self.fallback_mode if hasattr(module, "is_healthy") and callable(module.is_healthy): healthy = healthy and module.is_healthy() return healthy except: return False def process_command(self, command: str): """命令路由(增强版)""" try: # 特殊命令处理(系统级命令) if command.lower() in ["status", "health"]: return self.get_system_status() if command.lower().startswith(("exit", "quit", "shutdown")): return {"action": "shutdown", "message": "系统关闭中..."} # 提取命令前缀 parts = command.split(":", 1) if len(parts) < 2: return {"error": "命令格式错误,应为 '模块名:命令内容'"} module_name = parts[0].lower() command_content = parts[1].strip() # 检查模块可用性 if not self.is_module_available(module_name): return {"error": f"模块'{module_name}'不可用或已隔离"} module = self.modules.get(module_name) if module is None: return {"error": f"未知模块: {module_name}"} # 执行模块命令(安全包装) try: if hasattr(module, "execute") and callable(module.execute): return module.execute(command_content) else: return {"error": f"模块'{module_name}'不支持命令执行"} except Exception as e: self.logger.error(f"❌ 模块执行错误: {module_name} - {e}", exc_info=True) return {"error": f"模块执行出错: {str(e)}"} except Exception as e: self.logger.error(f"❌ 命令处理错误: {e}", exc_info=True) return {"error": f"内部处理错误: {str(e)}"} def get_system_status(self): """获取系统状态(详细版)""" status = { "system_time": time.strftime("%Y-%m-%d %H:%M:%S"), "architecture": "star", "status": "normal" if not self.fallback_mode else "degraded", "orchestrator": self.name, "modules": {} } # 收集各模块状态(安全访问) for name, module in self.modules.items(): module_status = {"name": name} if module is None: module_status["status"] = "not_initialized" else: try: if hasattr(module, "get_status") and callable(module.get_status): module_status.update(module.get_status()) else: module_status["status"] = "active" module_status["details"] = "Status method not implemented" # 添加健康状态标记 if hasattr(module, "is_healthy") and callable(module.is_healthy): module_status["healthy"] = module.is_healthy() else: module_status["healthy"] = True except Exception as e: module_status["status"] = "error" module_status["error"] = str(e) status["modules"][name] = module_status return status def get_status(self): """ 返回协调器的状态信息(新增方法) """ return { "status": "running" if not self.fallback_mode else "degraded", "name": self.name, "modules": list(self.modules.keys()), "fallback_mode": self.fallback_mode, "state_dir": str(self.state_dir) } def save_state(self): """安全保存系统状态""" try: state_file = self.state_dir / "system_state.json" state_data = { "timestamp": time.time(), "human_time": time.strftime("%Y-%m-%d %H:%M:%S"), "orchestrator": self.name, "status": self.get_system_status() } # 写入临时文件然后重命名,防止写入中断损坏 temp_file = state_file.with_suffix(".tmp") with open(temp_file, "w", encoding="utf-8") as f: json.dump(state_data, f, indent=2, ensure_ascii=False) # 原子操作替换文件 if os.path.exists(state_file): os.remove(state_file) os.rename(temp_file, state_file) self.logger.info(f"💾 系统状态已保存: {state_file}") return True except Exception as e: self.logger.error(f"❌ 保存状态失败: {e}", exc_info=True) return False def shutdown(self): """安全关闭协调器(优化顺序)""" self.logger.info("🛑 开始关闭协调器...") shutdown_success = True # 1. 保存状态 if not self.save_state(): shutdown_success = False # 2. 关闭模块(从外向内) shutdown_order = ["diagnostic", "environment"] for name in shutdown_order: module = self.modules.get(name) if module is None: continue try: if hasattr(module, "shutdown") and callable(module.shutdown): self.logger.info(f"🔌 正在关闭模块: {name}") module.shutdown() except Exception as e: self.logger.error(f"❌ 关闭模块失败: {name} - {e}", exc_info=True) shutdown_success = False # 3. 清理资源 try: self.modules.clear() self.logger.info("🧹 资源清理完成") except: pass if shutdown_success: self.logger.info("✅ 协调器已安全关闭") else: self.logger.warning("⚠️ 协调器关闭完成,但部分操作失败") # === 模块访问方法 === def get_module(self, name: str): """获取模块实例(安全访问)""" return self.modules.get(name, None) def get_modules(self): """获取所有模块实例""" return {k: v for k, v in self.modules.items() if v is not None}
最新发布
09-01
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值