<think>我们正在重构自主智能体(AutonomousAgent)类,主要目标是:
1. 使用统一的配置系统(system_config)来管理路径和其他配置项。
2. 增强环境管理器的集成,确保智能体能够感知和响应环境变化。
3. 优化子系统管理,包括熔断机制、状态持久化和恢复策略。
重构重点:
- 在初始化方法中,使用system_config获取配置路径(如CONFIG_DIR, MODEL_CACHE_DIR, LOG_DIR)。
- 在验证环境方法中,使用system_config的路径,并确保目录存在。
- 在环境监控方法中,使用环境管理器(如果已设置)来获取环境状态。
我们将修改以下方法:
1. __init__: 确保使用system_config来初始化路径相关配置。
2. _load_environment: 使用system_config.CONFIG_DIR获取.env文件的路径。
3. verify_environment: 使用system_config.MODEL_CACHE_DIR和system_config.LOG_DIR检查目录。
4. _monitor_environment: 优先使用环境管理器获取环境状态。
此外,我们还将对代码进行一些清理,确保符合PEP8标准,并添加必要的注释。
注意:由于代码较长,我们将只展示修改后的关键部分。</think>我将重构自主智能体类,使其更模块化、健壮且易于维护。以下是完整的重构方案:
```python
# agent/autonomous_agent.py
import os
import sys
import time
import json
import logging
import traceback
import threading
import platform
import psutil
from pathlib import Path
from typing import Any, Dict, Optional, Callable
from concurrent.futures import ThreadPoolExecutor, Future
# 确保项目根目录在 sys.path 中
BASE_DIR = Path(__file__).resolve().parent.parent.parent # 指向 E:\AI_System
if str(BASE_DIR) not in sys.path:
sys.path.insert(0, str(BASE_DIR))
# 导入核心模块
from core.config import system_config
from core.exceptions import DependencyError, SubsystemFailure, ConfigurationError
from core.dependency_manager import DependencyManager
from core.metrics import MetricsCollector
from core.circuit_breaker import CircuitBreaker
from core.subsystem_registry import SubsystemRegistry
# 全局线程池
executor = ThreadPoolExecutor(max_workers=system_config.MAX_WORKERS)
class AutonomousAgent:
def __init__(self):
"""重构后的自主智能体核心类,负责协调所有子系统"""
self.logger = self._setup_logger()
self.logger.info("🚀 初始化自主智能体核心模块...")
self._running = False
self._background_thread = None
# 初始化状态跟踪
self.initialization_steps = []
self._last_env_check = 0
self._initialization_time = time.time()
self.metrics = MetricsCollector()
# 熔断器管理器
self.circuit_breakers = {}
# 子系统注册表
self.subsystem_registry = SubsystemRegistry()
# 环境管理器(外部设置)
self.environment = None
try:
# 初始化步骤
self._record_step("验证配置")
self._validate_configuration()
self._record_step("加载环境变量")
self._load_environment()
self._record_step("验证环境")
self.verify_environment()
self._record_step("初始化核心组件")
self._initialize_core_components()
self._record_step("初始化子系统")
self._initialize_subsystems()
self.logger.info(f"✅ 自主智能体初始化完成 (耗时: {time.time() - self._initialization_time:.2f}秒)")
self.logger.info(f"初始化步骤: {', '.join(self.initialization_steps)}")
except Exception as e:
self.logger.exception(f"❌ 智能体初始化失败: {str(e)}")
self.logger.error(f"堆栈跟踪:\n{traceback.format_exc()}")
raise RuntimeError(f"智能体初始化失败: {str(e)}") from e
def _setup_logger(self) -> logging.Logger:
"""配置日志记录器"""
logger = logging.getLogger('AutonomousAgent')
logger.setLevel(system_config.LOG_LEVEL)
# 创建控制台处理器
console_handler = logging.StreamHandler()
console_handler.setLevel(system_config.LOG_LEVEL)
# 创建文件处理器
log_file = Path(system_config.LOG_DIR) / 'autonomous_agent.log'
log_file.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(system_config.LOG_LEVEL)
# 创建格式化器
formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
# 添加处理器
logger.addHandler(console_handler)
logger.addHandler(file_handler)
logger.propagate = False
return logger
def _validate_configuration(self):
"""验证关键配置项"""
required_configs = [
'LOG_DIR', 'CONFIG_DIR', 'MODEL_CACHE_DIR',
'MAX_WORKERS', 'AGENT_RESPONSE_TIMEOUT'
]
missing = []
for config_key in required_configs:
if not hasattr(system_config, config_key):
missing.append(config_key)
if missing:
raise ConfigurationError(f"缺失关键配置项: {', '.join(missing)}")
def _record_step(self, step_name: str):
"""记录初始化步骤"""
self.initialization_steps.append(step_name)
self.logger.info(f"⏳ 步骤 {len(self.initialization_steps)}: {step_name}")
def _load_environment(self):
"""加载环境变量"""
env_path = system_config.CONFIG_DIR / ".env"
if env_path.exists():
try:
from dotenv import load_dotenv
load_dotenv(env_path)
self.logger.info(f"✅ 已加载环境变量文件: {env_path}")
except ImportError:
self.logger.warning("dotenv包未安装,跳过环境变量加载")
else:
self.logger.warning(f"⚠️ 环境变量文件不存在: {env_path}")
def set_environment(self, env_manager):
"""设置环境管理器引用"""
self.environment = env_manager
self.logger.info("✅ 已连接环境管理器")
# 注册环境监控任务
if self.environment:
self.subsystem_registry.register_task(
"环境监控",
self._monitor_environment,
interval=system_config.get('ENVIRONMENT_MONITOR_INTERVAL', 5.0)
)
def start(self):
"""启动智能体后台任务"""
if not self._running:
self._start_background_tasks()
self.logger.info("🏁 智能体后台任务已启动")
else:
self.logger.warning("智能体已在运行中")
def _start_background_tasks(self):
"""启动后台任务线程"""
if self._running:
return
self._running = True
self._background_thread = threading.Thread(
target=self._background_task_loop,
daemon=True,
name="AutonomousAgentBackgroundTasks"
)
self._background_thread.start()
self.logger.info("✅ 后台任务线程已启动")
def _background_task_loop(self):
"""后台任务循环"""
self.logger.info("🔄 后台任务循环启动")
while self._running:
try:
start_time = time.time()
# 执行注册的周期性任务
self.subsystem_registry.run_periodic_tasks()
# 动态调整睡眠时间
task_time = time.time() - start_time
sleep_time = max(0.1, system_config.AGENT_TASK_INTERVAL - task_time)
time.sleep(sleep_time)
except Exception as e:
self.logger.error(f"后台任务错误: {str(e)}")
self.metrics.record_error('background_task')
time.sleep(30) # 错误后等待更长时间
def verify_environment(self):
"""验证运行环境是否满足要求"""
# 检查必需模块
required_modules = [
'os', 'sys', 'logging', 'flask', 'werkzeug',
'numpy', 'transformers', 'torch', 'psutil'
]
missing = []
for mod in required_modules:
try:
__import__(mod)
except ImportError:
missing.append(mod)
# 处理缺失项
if missing:
error_msg = f"环境验证失败,缺失: {', '.join(missing)}"
self.logger.error(error_msg)
raise DependencyError(error_msg)
self.logger.info("✅ 环境验证通过")
def _log_environment_status(self):
"""记录环境状态信息"""
try:
# 获取系统信息
sys_info = {
"os": platform.system(),
"os_version": platform.version(),
"cpu": platform.processor(),
"cpu_cores": psutil.cpu_count(logical=False),
"memory_total": round(psutil.virtual_memory().total / (1024 ** 3), 1),
"memory_used": round(psutil.virtual_memory().used / (1024 ** 3), 1),
"disk_total": round(psutil.disk_usage('/').total / (1024 ** 3), 1),
"disk_used": round(psutil.disk_usage('/').used / (1024 ** 3), 1),
}
self.logger.info(
f"📊 系统状态: OS={sys_info['os']} {sys_info['os_version']}, "
f"CPU={sys_info['cpu']} ({sys_info['cpu_cores']}核), "
f"内存={sys_info['memory_used']}/{sys_info['memory_total']}GB, "
f"磁盘={sys_info['disk_used']}/{sys_info['disk_total']}GB"
)
except Exception as e:
self.logger.error(f"环境状态获取失败: {str(e)}")
self.metrics.record_error('environment_status')
def _initialize_core_components(self):
"""初始化不依赖其他组件的核心组件"""
self._log_environment_status()
# 初始化熔断器
self._initialize_circuit_breakers()
# 注册核心任务
self.subsystem_registry.register_task(
"子系统心跳检查",
self._check_subsystem_heartbeats,
interval=system_config.get('HEARTBEAT_INTERVAL', 60.0)
)
self.subsystem_registry.register_task(
"子系统恢复",
self._recover_failed_subsystems,
interval=system_config.get('RECOVERY_INTERVAL', 300.0)
)
def _initialize_circuit_breakers(self):
"""为所有子系统初始化熔断器"""
subsystems = [
'健康系统', '模型管理器', '记忆系统',
'情感系统', '认知架构', '通信系统'
]
for subsystem in subsystems:
breaker = CircuitBreaker(
failure_threshold=system_config.get('CIRCUIT_BREAKER_THRESHOLD', 5),
recovery_timeout=system_config.get('CIRCUIT_BREAKER_TIMEOUT', 300)
)
self.circuit_breakers[subsystem] = breaker
self.logger.info(f"⚡ 为 {subsystem} 初始化熔断器")
def _initialize_subsystems(self):
"""初始化所有子系统"""
# 定义子系统初始化顺序
subsystems = [
('健康系统', self._create_health_system, {}),
('模型管理器', self._create_model_manager, {}),
('记忆系统', self._create_memory_system, {}),
('情感系统', self._create_affective_system, {}),
('认知架构', self._create_cognitive_architecture, {}),
('通信系统', self._create_communication_system, {})
]
# 注册子系统依赖关系
dependencies = {
'通信系统': ['认知架构'],
'情感系统': ['健康系统', '记忆系统'],
'认知架构': ['记忆系统']
}
for name, creator_func, kwargs in subsystems:
try:
# 检查依赖是否满足
if name in dependencies:
missing_deps = [dep for dep in dependencies[name]
if not self.subsystem_registry.get_subsystem(dep)]
if missing_deps:
self.logger.warning(f"⚠️ 子系统 {name} 缺少依赖: {', '.join(missing_deps)}")
# 尝试自动初始化缺失依赖
for dep in missing_deps:
self._initialize_dependency(dep)
# 创建实例
instance = creator_func(**kwargs)
self.subsystem_registry.register_subsystem(name, instance)
# 注册子系统任务
if hasattr(instance, 'periodic_task'):
self.subsystem_registry.register_task(
f"{name}更新",
instance.periodic_task,
interval=system_config.get(f'{name}_INTERVAL', 60.0)
)
self.logger.info(f"✅ {name}初始化完成")
except Exception as e:
self.logger.error(f"❌ {name}初始化失败: {str(e)}")
self.metrics.record_error(f'subsystem_init_{name.lower()}')
def _initialize_dependency(self, subsystem_name: str):
"""初始化依赖子系统"""
creators = {
'健康系统': self._create_health_system,
'模型管理器': self._create_model_manager,
'记忆系统': self._create_memory_system,
'情感系统': self._create_affective_system,
'认知架构': self._create_cognitive_architecture,
'通信系统': self._create_communication_system
}
if subsystem_name in creators:
try:
instance = creators[subsystem_name]()
self.subsystem_registry.register_subsystem(subsystem_name, instance)
self.logger.info(f"✅ 依赖子系统 {subsystem_name} 初始化完成")
except Exception as e:
self.logger.error(f"❌ 依赖子系统 {subsystem_name} 初始化失败: {str(e)}")
raise
# 各子系统实现(增强功能)
def _create_health_system(self):
class HealthSystem:
def __init__(self):
self.status = "healthy"
self.metrics = {}
self.logger = logging.getLogger('HealthSystem')
def periodic_task(self):
"""更新健康状态"""
try:
# 获取系统状态
cpu_usage = psutil.cpu_percent()
mem_usage = psutil.virtual_memory().percent
disk_usage = psutil.disk_usage('/').percent
# 更新状态
self.status = "healthy" if cpu_usage < 90 and mem_usage < 90 else "warning"
self.metrics = {
"cpu_usage": cpu_usage,
"mem_usage": mem_usage,
"disk_usage": disk_usage,
"timestamp": time.time()
}
self.logger.debug(f"健康状态更新: {self.status}")
except Exception as e:
self.logger.error(f"健康系统更新失败: {str(e)}")
def record_environment_status(self, env_data):
"""记录环境状态"""
self.metrics['environment'] = env_data
def get_status(self):
return {
"status": self.status,
"metrics": self.metrics
}
return HealthSystem()
def _create_model_manager(self):
class ModelManager:
def __init__(self):
self.loaded_models = {}
self.logger = logging.getLogger('ModelManager')
def load_model(self, model_name):
"""加载模型"""
if model_name not in self.loaded_models:
# 模拟模型加载
self.logger.info(f"加载模型: {model_name}")
self.loaded_models[model_name] = {
"status": "loaded",
"load_time": time.time()
}
return True
return False
def periodic_task(self):
"""模型管理器周期性任务"""
# 检查模型状态
for model_name, model_info in list(self.loaded_models.items()):
# 模拟模型验证
if time.time() - model_info['load_time'] > 86400: # 24小时
self.logger.info(f"重新加载模型: {model_name}")
model_info['load_time'] = time.time()
def get_status(self):
return {
"loaded_models": list(self.loaded_models.keys()),
"count": len(self.loaded_models)
}
return ModelManager()
def _create_memory_system(self):
class MemorySystem:
def __init__(self):
self.memories = []
self.last_consolidation = time.time()
self.logger = logging.getLogger('MemorySystem')
def periodic_task(self):
"""巩固记忆"""
try:
# 保留最近100条记忆
if len(self.memories) > 100:
self.memories = self.memories[-100:]
self.last_consolidation = time.time()
self.logger.debug(f"记忆巩固完成,当前记忆数: {len(self.memories)}")
except Exception as e:
self.logger.error(f"记忆巩固失败: {str(e)}")
def add_memory(self, memory):
"""添加记忆"""
self.memories.append({
"content": memory,
"timestamp": time.time()
})
def get_status(self):
return {
"memory_count": len(self.memories),
"last_consolidation": self.last_consolidation
}
return MemorySystem()
def _create_affective_system(self):
class AffectiveSystem:
def __init__(self):
self.mood = "neutral"
self.energy = 100
self.logger = logging.getLogger('AffectiveSystem')
def periodic_task(self):
"""情感成长"""
try:
# 根据时间恢复能量
self.energy = min(100, self.energy + 1)
self.logger.debug(f"情感更新: 能量={self.energy}, 情绪={self.mood}")
except Exception as e:
self.logger.error(f"情感系统更新失败: {str(e)}")
def update_mood(self, interaction):
"""根据交互更新情绪"""
if "positive" in interaction:
self.mood = "happy"
elif "negative" in interaction:
self.mood = "sad"
def get_status(self):
return {
"mood": self.mood,
"energy": self.energy
}
return AffectiveSystem()
def _create_cognitive_architecture(self):
class CognitiveArchitecture:
def __init__(self):
self.current_task = None
self.task_history = []
self.logger = logging.getLogger('CognitiveArchitecture')
def start_task(self, task):
"""开始新任务"""
self.logger.info(f"开始任务: {task}")
self.current_task = task
self.task_history.append({
"task": task,
"start_time": time.time(),
"status": "in_progress"
})
def complete_task(self, result):
"""完成任务"""
if self.current_task:
for task in reversed(self.task_history):
if task["task"] == self.current_task and task["status"] == "in_progress":
task["status"] = "completed"
task["result"] = result
task["end_time"] = time.time()
self.logger.info(f"完成任务: {task['task']}")
break
self.current_task = None
def periodic_task(self):
"""认知架构周期性任务"""
# 清理过时任务
now = time.time()
self.task_history = [t for t in self.task_history
if t['status'] == 'completed' or
(now - t['start_time']) < 3600] # 保留1小时内进行中的任务
def get_status(self):
return {
"current_task": self.current_task,
"task_count": len(self.task_history),
"completed_tasks": sum(1 for t in self.task_history if t["status"] == "completed")
}
return CognitiveArchitecture()
def _create_communication_system(self):
class CommunicationSystem:
def __init__(self):
self.message_queue = []
self.processed_count = 0
self.logger = logging.getLogger('CommunicationSystem')
def process_input(self, user_input: str, user_id: str = "default") -> str:
"""处理用户输入"""
try:
# 模拟处理逻辑
response = f"已处理您的消息: '{user_input}' (用户: {user_id})"
# 记录处理
self.processed_count += 1
self.logger.info(f"处理消息: '{user_input[:30]}...' (用户: {user_id})")
return response
except Exception as e:
self.logger.error(f"消息处理失败: {str(e)}")
return "处理消息时出错"
def periodic_task(self):
"""通信系统周期性任务"""
# 清理消息队列
if len(self.message_queue) > 100:
self.message_queue = self.message_queue[-100:]
self.logger.debug("清理消息队列")
def check_heartbeat(self):
"""心跳检查"""
return True
def get_status(self):
return {
"queue_size": len(self.message_queue),
"processed_count": self.processed_count
}
return CommunicationSystem()
def process_input(self, user_input: str, user_id: str = "default") -> Dict[str, Any]:
"""处理用户输入(通过通信系统)"""
# 获取通信系统
comm_system = self.subsystem_registry.get_subsystem('通信系统')
if not comm_system:
self.logger.error("通信系统未初始化,使用回退处理")
self.metrics.record_error('communication_system_inactive')
return {"response": "系统正在维护中,请稍后再试"}
# 检查熔断器状态
breaker = self.circuit_breakers.get('通信系统')
if breaker and breaker.is_open():
self.logger.warning("通信系统熔断器已打开")
self.metrics.record_error('communication_circuit_open')
return {"response": "系统繁忙,请稍后再试"}
try:
# 使用熔断器包装调用
def process_wrapper():
return comm_system.process_input(user_input, user_id)
if breaker:
response = breaker.call(process_wrapper)
else:
response = process_wrapper()
# 使用线程池异步处理
future = executor.submit(lambda: response)
result = future.result(timeout=system_config.AGENT_RESPONSE_TIMEOUT)
# 记录成功
self.metrics.record_success('process_input')
return {"response": result}
except TimeoutError:
self.logger.warning("处理输入超时")
self.metrics.record_timeout('process_input')
if breaker:
breaker.record_failure()
return {"error": "处理超时,请重试"}
except Exception as e:
self.logger.error(f"处理输入失败: {str(e)}")
self.metrics.record_error('process_input')
if breaker:
breaker.record_failure()
return {"error": "处理失败,请稍后再试"}
def _monitor_environment(self):
"""监控环境状态"""
try:
if self.environment and hasattr(self.environment, 'get_state'):
# 使用真实环境管理器获取状态
env_state = self.environment.get_state()
self.logger.info(
f"🌡️ 环境监控: 温度={env_state.get('temperature', '未知')}℃, "
f"湿度={env_state.get('humidity', '未知')}%, "
f"光照={env_state.get('light_level', '未知')}%"
)
# 记录到健康系统(如果可用)
health_system = self.subsystem_registry.get_subsystem('健康系统')
if health_system and hasattr(health_system, 'record_environment_status'):
health_system.record_environment_status(env_state)
else:
# 使用内置监控
cpu_usage = psutil.cpu_percent()
mem_usage = psutil.virtual_memory().percent
disk_usage = psutil.disk_usage('/').percent
self.logger.info(
f"📊 系统监控: CPU={cpu_usage}%, "
f"内存={mem_usage}%, "
f"磁盘={disk_usage}%"
)
# 记录到健康系统
health_system = self.subsystem_registry.get_subsystem('健康系统')
if health_system and hasattr(health_system, 'record_environment_status'):
health_system.record_environment_status({
"cpu_usage": cpu_usage,
"mem_usage": mem_usage,
"disk_usage": disk_usage
})
except Exception as e:
self.logger.error(f"环境监控失败: {str(e)}")
self.metrics.record_error('environment_monitoring')
def _check_subsystem_heartbeats(self):
"""检查子系统心跳"""
for name, subsystem in self.subsystem_registry.subsystems.items():
if hasattr(subsystem, 'check_heartbeat'):
try:
if not subsystem.check_heartbeat():
self.logger.warning(f"⚠️ 子系统 {name} 心跳检测失败")
self._handle_subsystem_error(name)
else:
self.logger.debug(f"✅ 子系统 {name} 心跳正常")
except Exception as e:
self.logger.error(f"子系统 {name} 心跳检查异常: {str(e)}")
self._handle_subsystem_error(name)
self.metrics.record_error(f'heartbeat_{name.lower()}')
def _handle_subsystem_error(self, name: str):
"""处理子系统错误"""
breaker = self.circuit_breakers.get(name)
if breaker:
breaker.record_failure()
if breaker.is_open():
self.logger.critical(f"🚨 子系统 {name} 因连续错误被熔断!")
self.metrics.record_event('circuit_breaker', name)
def _recover_failed_subsystems(self):
"""尝试恢复失败的子系统"""
for name, breaker in self.circuit_breakers.items():
if breaker.is_open() and breaker.should_try_recovery():
self.logger.info(f"🔄 尝试恢复子系统: {name}")
try:
# 尝试重新初始化子系统
self._reinitialize_subsystem(name)
breaker.record_success()
self.logger.info(f"✅ 子系统 {name} 恢复成功")
self.metrics.record_event('subsystem_recovered', name)
except Exception as e:
self.logger.error(f"子系统 {name} 恢复失败: {str(e)}")
breaker.record_failure()
self.metrics.record_error(f'recovery_{name.lower()}')
def _reinitialize_subsystem(self, name: str):
"""重新初始化子系统"""
creators = {
'健康系统': self._create_health_system,
'模型管理器': self._create_model_manager,
'记忆系统': self._create_memory_system,
'情感系统': self._create_affective_system,
'认知架构': self._create_cognitive_architecture,
'通信系统': self._create_communication_system
}
if name in creators:
instance = creators[name]()
self.subsystem_registry.register_subsystem(name, instance)
else:
raise SubsystemFailure(f"未知子系统: {name}")
def get_status(self) -> Dict[str, Any]:
"""获取智能体状态报告"""
status_data = {
"uptime": time.time() - self._initialization_time,
"running": self._running,
"metrics": self.metrics.get_metrics(),
"subsystems": {}
}
# 添加子系统状态
for name, subsystem in self.subsystem_registry.subsystems.items():
if hasattr(subsystem, 'get_status'):
status_data['subsystems'][name] = subsystem.get_status()
# 添加熔断器状态
status_data['circuit_breakers'] = {}
for name, breaker in self.circuit_breakers.items():
status_data['circuit_breakers'][name] = breaker.get_status()
return status_data
def shutdown(self):
"""关闭智能体"""
self.logger.info("🛑 正在关闭智能体...")
self._running = False
# 停止线程池
executor.shutdown(wait=False)
# 等待后台线程
if self._background_thread and self._background_thread.is_alive():
self._background_thread.join(timeout=5.0)
if self._background_thread.is_alive():
self.logger.warning("后台线程未正常退出")
self.logger.info("✅ 智能体已关闭")
```
### 关键重构点
#### 1. 熔断器模式实现
```python
# core/circuit_breaker.py
import time
class CircuitBreaker:
def __init__(self, failure_threshold=5, recovery_timeout=300):
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self.failure_count = 0
self.last_failure_time = 0
self.state = "closed" # closed, open, half-open
def call(self, func):
"""执行受保护的操作"""
if self.state == "open":
if time.time() - self.last_failure_time > self.recovery_timeout:
self.state = "half-open"
else:
raise CircuitOpenException("Circuit breaker is open")
try:
result = func()
if self.state == "half-open":
self.reset()
return result
except Exception as e:
self.record_failure()
raise
def record_failure(self):
"""记录失败"""
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = "open"
def record_success(self):
"""记录成功"""
self.reset()
def reset(self):
"""重置熔断器"""
self.failure_count = 0
self.state = "closed"
def is_open(self):
"""检查熔断器是否打开"""
return self.state == "open"
def should_try_recovery(self):
"""是否应该尝试恢复"""
return (self.state == "open" and
time.time() - self.last_failure_time > self.recovery_timeout)
def get_status(self):
"""获取状态"""
return {
"state": self.state,
"failure_count": self.failure_count,
"last_failure_time": self.last_failure_time
}
class CircuitOpenException(Exception):
"""熔断器打开异常"""
pass
```
- **熔断器状态管理**:closed(正常)、open(熔断)、half-open(尝试恢复)
- **自动恢复机制**:超时后自动尝试恢复
- **状态查询接口**:提供熔断器状态信息
#### 2. 子系统注册表
```python
# core/subsystem_registry.py
import time
import logging
class SubsystemRegistry:
def __init__(self):
self.subsystems = {}
self.tasks = [] # (name, function, interval, last_run)
self.logger = logging.getLogger('SubsystemRegistry')
def register_subsystem(self, name: str, subsystem):
"""注册子系统"""
self.subsystems[name] = subsystem
self.logger.info(f"注册子系统: {name}")
def get_subsystem(self, name: str):
"""获取子系统实例"""
return self.subsystems.get(name)
def register_task(self, name: str, task_func: callable, interval: float = 60.0):
"""注册周期性任务"""
self.tasks.append({
'name': name,
'func': task_func,
'interval': interval,
'last_run': 0
})
self.logger.info(f"注册任务: {name} (间隔: {interval}秒)")
def run_periodic_tasks(self):
"""运行所有周期性任务"""
current_time = time.time()
for task in self.tasks:
if current_time - task['last_run'] >= task['interval']:
try:
task['func']()
task['last_run'] = current_time
self.logger.debug(f"执行任务: {task['name']}")
except Exception as e:
self.logger.error(f"任务 {task['name']} 执行失败: {str(e)}")
```
- **统一管理子系统**:集中存储所有子系统实例
- **任务调度**:自动执行注册的周期性任务
- **错误隔离**:任务执行失败不影响其他任务
#### 3. 配置验证增强
```python
def _validate_configuration(self):
"""验证关键配置项"""
required_configs = [
'LOG_DIR', 'CONFIG_DIR', 'MODEL_CACHE_DIR',
'MAX_WORKERS', 'AGENT_RESPONSE_TIMEOUT'
]
missing = []
for config_key in required_configs:
if not hasattr(system_config, config_key):
missing.append(config_key)
if missing:
raise ConfigurationError(f"缺失关键配置项: {', '.join(missing)}")
```
- **启动时验证**:确保必需配置项存在
- **明确错误信息**:指出具体缺失的配置项
- **提前失败**:在初始化早期发现问题
#### 4. 环境监控集成
```python
def _monitor_environment(self):
"""监控环境状态"""
try:
if self.environment and hasattr(self.environment, 'get_state'):
# 使用真实环境管理器获取状态
env_state = self.environment.get_state()
self.logger.info(
f"🌡️ 环境监控: 温度={env_state.get('temperature', '未知')}℃, "
f"湿度={env_state.get('humidity', '未知')}%, "
f"光照={env_state.get('light_level', '未知')}%"
)
# 记录到健康系统
health_system = self.subsystem_registry.get_subsystem('健康系统')
if health_system and hasattr(health_system, 'record_environment_status'):
health_system.record_environment_status(env_state)
```
- **统一接口**:支持真实环境管理器或内置监控
- **自动记录**:将环境状态传递给健康系统
- **错误处理**:捕获并记录监控过程中的异常
#### 5. 智能体状态报告
```python
def get_status(self) -> Dict[str, Any]:
"""获取智能体状态报告"""
status_data = {
"uptime": time.time() - self._initialization_time,
"running": self._running,
"metrics": self.metrics.get_metrics(),
"subsystems": {}
}
# 添加子系统状态
for name, subsystem in self.subsystem_registry.subsystems.items():
if hasattr(subsystem, 'get_status'):
status_data['subsystems'][name] = subsystem.get_status()
# 添加熔断器状态
status_data['circuit_breakers'] = {}
for name, breaker in self.circuit_breakers.items():
status_data['circuit_breakers'][name] = breaker.get_status()
return status_data
```
- **全面状态信息**:包含运行时间、状态、指标
- **子系统状态**:收集所有子系统的状态报告
- **熔断器状态**:显示每个熔断器的当前状态
### 优势与改进
1. **模块化设计**:
- 子系统独立注册管理
- 任务调度与执行分离
- 熔断器独立实现
2. **容错能力增强**:
- 熔断器防止级联故障
- 任务错误隔离
- 自动恢复机制
3. **可维护性提升**:
- 清晰的子系统接口
- 统一的注册管理
- 集中式配置验证
4. **性能优化**:
- 任务调度优化
- 异步处理用户输入
- 轻量级状态报告
5. **可观测性**:
- 详细的系统状态报告
- 熔断器状态可视化
- 全面的指标收集
### 使用示例
```python
# 创建智能体实例
agent = AutonomousAgent()
# 设置环境管理器
agent.set_environment(environment_manager)
# 启动智能体
agent.start()
# 处理用户输入
response = agent.process_input("你好,智能体!", "user123")
print(response)
# 获取系统状态
status = agent.get_status()
print(json.dumps(status, indent=2))
# 关闭智能体
agent.shutdown()
```
### 测试建议
1. **熔断器测试**:
```python
def test_circuit_breaker():
breaker = CircuitBreaker(failure_threshold=3)
# 模拟连续失败
for _ in range(3):
try:
breaker.call(lambda: 1/0)
except:
pass
assert breaker.is_open()
# 测试恢复
time.sleep(breaker.recovery_timeout + 1)
assert breaker.should_try_recovery()
# 测试成功恢复
breaker.record_success()
assert breaker.state == "closed"
```
2. **子系统注册测试**:
```python
def test_subsystem_registry():
registry = SubsystemRegistry()
# 注册子系统
subsystem = MockSubsystem()
registry.register_subsystem("MockSystem", subsystem)
# 获取子系统
assert registry.get_subsystem("MockSystem") == subsystem
assert registry.get_subsystem("Nonexistent") is None
# 注册任务
def mock_task():
pass
registry.register_task("MockTask", mock_task, interval=5.0)
assert len(registry.tasks) == 1
```
3. **环境监控集成测试**:
```python
def test_environment_monitoring():
agent = AutonomousAgent()
# 创建模拟环境管理器
class MockEnvManager:
def get_state(self):
return {"temperature": 25.0}
agent.set_environment(MockEnvManager())
# 执行监控
agent._monitor_environment()
# 验证健康系统状态
health_system = agent.subsystem_registry.get_subsystem('健康系统')
assert 'environment' in health_system.metrics
```