稍等一下 好像有一点不对 我需要跟你交流讨论 先别写代码 可以吗?就是他有学习系统嘛 他还有个核心 我应该是跟他的核心对话,而不是跟模型,你懂我意思吗?你看看核心代码 感受一下 我的意思:”# E:\AI_System\agent\autonomous_agent.py
import os
import sys
import time
import logging
import importlib
import traceback
import psutil
import platform
import threading
import json
from pathlib import Path
from dotenv import load_dotenv
from typing import Dict, Any, Optional, List, Callable
from concurrent.futures import ThreadPoolExecutor
from ..core.config import system_config
# 使用绝对导入 - 确保路径正确
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from core.config import system_config
from core.exceptions import DependencyError, SubsystemFailure
from core.dependency_manager import DependencyManager
from core.metrics import PerformanceMetrics, MetricsCollector
# 全局线程池
executor = ThreadPoolExecutor(max_workers=4)
class AutonomousAgent:
def __init__(self):
"""自主智能体核心类,负责协调所有子系统"""
self.logger = self._setup_logger()
self.logger.info("🔁 初始化自主智能体核心模块...")
self._running = False # 运行状态标志
self._background_thread = None # 后台线程
# 初始化状态跟踪
self.initialization_steps = []
self._last_env_check = 0
self._initialization_time = time.time()
self.subsystem_status = {} # 子系统熔断状态
self.metrics = MetricsCollector() # 性能监控
self._status_lock = threading.Lock() # 状态锁
# 依赖管理器
self.dependency_manager = DependencyManager()
try:
# 记录初始化步骤
self._record_step("加载环境变量")
load_dotenv()
self._record_step("验证环境")
self.verify_environment()
self._record_step("初始化核心组件")
self._initialize_core_components()
self._record_step("初始化子系统")
self._initialize_subsystems()
self.logger.info(f"✅ 自主智能体初始化完成 (耗时: {time.time() - self._initialization_time:.2f}秒)")
self.logger.info(f"初始化步骤: {', '.join(self.initialization_steps)}")
# 启动后台任务线程
self._start_background_tasks()
except Exception as e:
self.logger.exception(f"❌ 智能体初始化失败: {str(e)}")
self.logger.error(f"堆栈跟踪:\n{traceback.format_exc()}")
raise RuntimeError(f"智能体初始化失败: {str(e)}") from e
def _start_background_tasks(self):
"""启动后台任务线程"""
if self._running:
self.logger.warning("后台任务已在运行")
return
self._running = True
self._background_thread = threading.Thread(
target=self._background_task_loop,
daemon=True,
name="AutonomousAgentBackgroundTasks"
)
self._background_thread.start()
self.logger.info("✅ 后台任务线程已启动")
def _background_task_loop(self):
"""后台任务循环"""
while self._running:
try:
start_time = time.time()
self.run_periodic_tasks()
# 动态调整睡眠时间
task_time = time.time() - start_time
sleep_time = max(0.1, 10 - task_time) # 确保至少10秒间隔
time.sleep(sleep_time)
except Exception as e:
self.logger.error(f"后台任务错误: {str(e)}")
self.metrics.record_error('background_task')
time.sleep(30)
def _record_step(self, step_name: str):
"""记录初始化步骤"""
self.initialization_steps.append(step_name)
self.logger.info(f"⏳ 步骤 {len(self.initialization_steps)}: {step_name}")
def verify_environment(self):
"""验证运行环境是否满足要求"""
missing = []
warnings = []
# 检查必需模块
required_modules = [
'os', 'sys', 'logging', 'dotenv', 'flask', 'werkzeug',
'numpy', 'transformers', 'torch', 'psutil'
]
for mod in required_modules:
try:
importlib.import_module(mod)
except ImportError:
missing.append(mod)
# 检查配置文件
if not hasattr(system_config, 'CONFIG_PATH') or not os.path.exists(system_config.CONFIG_PATH):
self.logger.error(f"❌ 配置文件缺失: {system_config.CONFIG_PATH}")
warnings.append(f"配置文件缺失: {system_config.CONFIG_PATH}")
# 检查模型目录 - 如果不存在则创建
model_dir = Path(system_config.MODEL_CACHE_DIR)
if not model_dir.exists():
model_dir.mkdir(parents=True, exist_ok=True)
self.logger.warning(f"⚠️ 创建模型缓存目录: {model_dir}")
# 检查日志目录 - 如果不存在则创建
log_dir = Path(system_config.LOG_DIR)
if not log_dir.exists():
log_dir.mkdir(parents=True, exist_ok=True)
self.logger.warning(f"⚠️ 创建日志目录: {log_dir}")
# 处理警告
for warning in warnings:
self.logger.warning(warning)
# 处理缺失项
if missing:
error_msg = f"环境验证失败,缺失: {', '.join(missing)}"
self.logger.error(error_msg)
self.dependency_manager.record_missing_dependencies(missing)
raise DependencyError(error_msg)
self.logger.info("✅ 环境验证通过")
def _setup_logger(self) -> logging.Logger:
"""配置日志记录器"""
logger = logging.getLogger('AutonomousAgent')
logger.setLevel(system_config.LOG_LEVEL)
# 创建控制台处理器
console_handler = logging.StreamHandler()
console_handler.setLevel(system_config.LOG_LEVEL)
# 创建文件处理器
log_file = Path(system_config.LOG_DIR) / 'autonomous_agent.log'
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(system_config.LOG_LEVEL)
# 创建格式化器
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
# 添加处理器
logger.addHandler(console_handler)
logger.addHandler(file_handler)
logger.propagate = False
return logger
def _initialize_core_components(self):
"""初始化不依赖其他组件的核心组件"""
# 获取项目根目录
base_dir = Path(__file__).resolve().parent.parent
# 环境相关组件 - 使用回退实现
self.environment = self._create_fallback_environment(base_dir)
self.logger.info("✅ 环境接口初始化完成")
# 记录环境状态
self._log_environment_status()
# 初始化状态持久化
self._load_subsystem_status()
def _create_fallback_environment(self, base_dir: Path):
"""创建回退的环境实现"""
class FallbackEnvironment:
def __init__(self, base_dir):
self.base_dir = base_dir
self.status_file = base_dir / 'environment_status.json'
def get_system_info(self):
try:
# 尝试从文件加载状态
if self.status_file.exists():
with open(self.status_file, 'r') as f:
return json.load(f)
except:
pass
# 创建新状态
status = {
"os": platform.system(),
"os_version": platform.version(),
"cpu": platform.processor(),
"cpu_cores": psutil.cpu_count(logical=False),
"memory_total": round(psutil.virtual_memory().total / (1024 ** 3), 1),
"memory_used": round(psutil.virtual_memory().used / (1024 ** 3), 1),
"disk_total": round(psutil.disk_usage('/').total / (1024 ** 3), 1),
"disk_used": round(psutil.disk_usage('/').used / (1024 ** 3), 1),
"timestamp": time.time()
}
# 保存状态
try:
with open(self.status_file, 'w') as f:
json.dump(status, f)
except:
pass
return status
return FallbackEnvironment(base_dir)
def _log_environment_status(self):
"""记录环境状态信息"""
try:
env_status = self.environment.get_system_info() or {}
self.logger.info(
f"📊 系统状态: OS={env_status.get('os', '未知')} {env_status.get('os_version', '')}, "
f"CPU={env_status.get('cpu', '未知')} ({env_status.get('cpu_cores', 0)}核), "
f"内存={env_status.get('memory_used', 0)}/{env_status.get('memory_total', 0)}GB, "
f"磁盘={env_status.get('disk_used', 0)}/{env_status.get('disk_total', 0)}GB"
)
except Exception as e:
self.logger.error(f"环境状态获取失败: {str(e)}")
self.metrics.record_error('environment_status')
def _initialize_subsystems(self):
"""初始化所有子系统 - 使用动态导入并添加详细错误处理"""
# 定义子系统初始化顺序 - 使用更简单的回退实现
subsystems = [
('健康系统', self._create_fallback_health_system, {}),
('模型管理器', self._create_fallback_model_manager, {}),
('记忆系统', self._create_fallback_memory_system, {}),
('情感系统', self._create_fallback_affective_system, {}),
('认知架构', self._create_fallback_cognitive_architecture, {}),
('通信系统', self._create_fallback_communication_system, {})
]
# 注册子系统依赖关系
self.dependency_manager.register_dependency('通信系统', ['认知架构'])
self.dependency_manager.register_dependency('情感系统', ['健康系统', '记忆系统'])
self.dependency_manager.register_dependency('认知架构', ['记忆系统'])
# 初始化子系统
for name, creator_func, kwargs in subsystems:
try:
# 检查依赖是否满足
missing_deps = self.dependency_manager.check_dependencies(name)
if missing_deps:
self.logger.warning(f"⚠️ 子系统 {name} 缺少依赖: {', '.join(missing_deps)}")
# 尝试自动安装缺失依赖
self.dependency_manager.install_missing_dependencies(missing_deps)
# 创建实例
instance = creator_func(**kwargs)
setattr(self, name.lower().replace(' ', '_'), instance)
self.logger.info(f"✅ {name}初始化完成")
# 标记子系统为活跃状态
with self._status_lock:
self.subsystem_status[name] = {
'active': True,
'error_count': 0,
'last_active': time.time(),
'last_recovery_attempt': 0
}
except Exception as e:
self.logger.error(f"❌ {name}初始化失败: {str(e)}")
with self._status_lock:
self.subsystem_status[name] = {
'active': False,
'error': str(e),
'error_count': 1,
'last_error': time.time()
}
# 记录指标
self.metrics.record_error(f'subsystem_init_{name.lower()}')
# 保存子系统状态
self._save_subsystem_status()
# 各子系统回退实现保持不变...
def process_input(self, user_input: str, user_id: str = "default") -> Dict[str, Any]:
"""处理用户输入(通过通信系统)"""
# 检查通信系统是否活跃
with self._status_lock:
comm_status = self.subsystem_status.get('通信系统', {})
active = comm_status.get('active', False)
if not active:
self.logger.error("通信系统未激活,使用回退处理")
self.metrics.record_error('communication_system_inactive')
return {"response": "系统正在维护中,请稍后再试"}
try:
# 使用性能监控
with PerformanceMetrics() as pm:
# 使用线程池异步处理
future = executor.submit(
self.communication_system.process_input,
user_input,
user_id
)
response = future.result(timeout=10) # 10秒超时
# 记录性能指标
self.metrics.record_latency('process_input', pm.duration)
self.metrics.record_success('process_input')
self.logger.info(f"📥 处理输入: '{user_input[:30]}...' → 耗时: {pm.duration:.2f}秒")
return response
except TimeoutError:
self.logger.warning("处理输入超时")
self.metrics.record_timeout('process_input')
return {"error": "处理超时,请重试"}
except Exception as e:
# 更新错误计数
with self._status_lock:
comm_status = self.subsystem_status.get('通信系统', {})
comm_status['error_count'] = comm_status.get('error_count', 0) + 1
comm_status['last_error'] = time.time()
# 检查熔断条件
if comm_status['error_count'] >= 5: # 临时阈值
comm_status['active'] = False
self.logger.critical(f"🚨 通信系统因连续错误被熔断!")
self.metrics.record_event('circuit_breaker', '通信系统')
self.logger.error(f"处理输入失败: {str(e)}")
self.metrics.record_error('process_input')
return {"error": "处理失败,请稍后再试"}
def run_periodic_tasks(self):
"""运行周期性任务"""
task_start = time.time()
tasks_executed = 0
tasks_failed = 0
# 定义任务列表
tasks = [
('健康系统更新', lambda: self.health_system.update()),
('情感系统更新', lambda: self.affective_system.grow()),
('记忆系统维护', lambda: self.memory_system.consolidate_memories()),
('环境监控', self._monitor_environment),
('子系统心跳检查', self._check_subsystem_heartbeats),
('子系统恢复', self._recover_failed_subsystems)
]
# 执行任务
for name, task_func in tasks:
try:
if name == '环境监控' or self._is_subsystem_active(name.split()[0]):
task_func()
tasks_executed += 1
except Exception as e:
tasks_failed += 1
subsystem_name = name.split()[0]
self.logger.error(f"{name}失败: {str(e)}", exc_info=True)
self._handle_subsystem_error(subsystem_name, e)
self.metrics.record_error(f'periodic_{subsystem_name.lower()}')
# 记录任务执行情况
if tasks_executed > 0:
task_time = time.time() - task_start
self.logger.debug(f"⏱️ 执行 {tasks_executed} 项周期性任务 ({tasks_failed}失败), 耗时: {task_time:.3f}秒")
self.metrics.record_latency('periodic_tasks', task_time)
self.metrics.record_value('periodic_tasks_count', tasks_executed)
self.metrics.record_value('periodic_tasks_failed', tasks_failed)
def _is_subsystem_active(self, name: str) -> bool:
"""检查子系统是否活跃"""
with self._status_lock:
status = self.subsystem_status.get(name, {})
return status.get('active', False)
def _handle_subsystem_error(self, name: str, error: Exception):
"""处理子系统错误"""
with self._status_lock:
status = self.subsystem_status.get(name, {})
status['error_count'] = status.get('error_count', 0) + 1
status['last_error'] = time.time()
# 检查熔断条件
if status['error_count'] >= 5: # 临时阈值
status['active'] = False
self.logger.critical(f"🚨 子系统 {name} 因连续错误被熔断!")
self.metrics.record_event('circuit_breaker', name)
def _check_subsystem_heartbeats(self):
"""检查子系统心跳"""
for name in list(self.subsystem_status.keys()):
with self._status_lock:
status = self.subsystem_status.get(name, {})
if not status.get('active', False):
continue # 跳过已熔断的
subsystem = getattr(self, name.lower().replace(' ', '_'), None)
if subsystem and hasattr(subsystem, 'check_heartbeat'):
try:
if not subsystem.check_heartbeat():
self.logger.warning(f"⚠️ 子系统 {name} 心跳检测失败")
self._handle_subsystem_error(name, RuntimeError("心跳检测失败"))
else:
# 更新最后活跃时间
with self._status_lock:
status['last_active'] = time.time()
except Exception as e:
self.logger.error(f"子系统 {name} 心跳检查异常: {str(e)}")
self._handle_subsystem_error(name, e)
self.metrics.record_error(f'heartbeat_{name.lower()}')
def _recover_failed_subsystems(self):
"""尝试恢复失败的子系统"""
for name in list(self.subsystem_status.keys()):
with self._status_lock:
status = self.subsystem_status.get(name, {})
if status.get('active', False):
continue # 跳过活跃的
# 检查恢复条件:错误后至少等待5分钟
last_error = status.get('last_error', 0)
if time.time() - last_error < 300:
continue
# 检查上次恢复尝试时间
last_attempt = status.get('last_recovery_attempt', 0)
if time.time() - last_attempt < 600: # 每10分钟尝试一次
continue
self.logger.info(f"🔄 尝试恢复子系统: {name}")
status['last_recovery_attempt'] = time.time()
try:
# 尝试重新初始化子系统
# 这里需要根据子系统名称调用相应的初始化方法
# 简化实现:直接重置状态
subsystem = self._reinitialize_subsystem(name)
setattr(self, name.lower().replace(' ', '_'), subsystem)
with self._status_lock:
status['active'] = True
status['error_count'] = 0
status['last_error'] = 0
self.logger.info(f"✅ 子系统 {name} 恢复成功")
self.metrics.record_event('subsystem_recovered', name)
except Exception as e:
with self._status_lock:
status['active'] = False
status['error_count'] += 1
status['last_error'] = time.time()
self.logger.error(f"子系统 {name} 恢复失败: {str(e)}")
self.metrics.record_error(f'recovery_{name.lower()}')
def _reinitialize_subsystem(self, name: str) -> Any:
"""重新初始化子系统"""
# 根据名称选择初始化方法
creators = {
'健康系统': self._create_fallback_health_system,
'模型管理器': self._create_fallback_model_manager,
'记忆系统': self._create_fallback_memory_system,
'情感系统': self._create_fallback_affective_system,
'认知架构': self._create_fallback_cognitive_architecture,
'通信系统': self._create_fallback_communication_system
}
if name in creators:
return creators[name]()
else:
raise SubsystemFailure(f"未知子系统: {name}")
def _monitor_environment(self):
"""监控环境状态"""
try:
self.logger.info("🔍 开始环境监控...")
env_status = self.environment.get_system_info() or {}
# 获取CPU和内存使用情况
env_status['cpu_usage'] = psutil.cpu_percent()
env_status['memory_usage'] = psutil.virtual_memory().percent
env_status['disk_usage'] = psutil.disk_usage('/').percent
# 记录到日志
self.logger.info(
f"📊 环境监控: CPU={env_status['cpu_usage']}%, "
f"内存={env_status['memory_usage']}%, "
f"磁盘={env_status['disk_usage']}%"
)
# 记录到健康系统
if hasattr(self, 'health_system'):
self.health_system.record_environment_status(env_status)
# 记录指标
self.metrics.record_value('cpu_usage', env_status['cpu_usage'])
self.metrics.record_value('memory_usage', env_status['memory_usage'])
self.metrics.record_value('disk_usage', env_status['disk_usage'])
except Exception as e:
self.logger.error(f"环境监控失败: {str(e)}", exc_info=True)
self.metrics.record_error('environment_monitoring')
def _save_subsystem_status(self):
"""保存子系统状态到文件"""
status_file = Path(system_config.CONFIG_DIR) / 'subsystem_status.json'
try:
with self._status_lock:
data = {
'timestamp': time.time(),
'status': self.subsystem_status
}
with open(status_file, 'w') as f:
json.dump(data, f, indent=2)
except Exception as e:
self.logger.error(f"保存子系统状态失败: {str(e)}")
def _load_subsystem_status(self):
"""从文件加载子系统状态"""
status_file = Path(system_config.CONFIG_DIR) / 'subsystem_status.json'
if status_file.exists():
try:
with open(status_file, 'r') as f:
data = json.load(f)
# 只加载24小时内的状态
if time.time() - data.get('timestamp', 0) < 86400:
with self._status_lock:
self.subsystem_status = data.get('status', {})
self.logger.info("加载子系统状态缓存")
except Exception as e:
self.logger.error(f"加载子系统状态失败: {str(e)}")
def get_status(self) -> Dict[str, Any]:
"""获取智能体状态报告"""
with self._status_lock:
status_data = {
"uptime": time.time() - self._initialization_time,
"subsystems": {
name: info.get('active', False)
for name, info in self.subsystem_status.items()
},
"circuit_breaker": {
name: {
"active": info.get('active', False),
"error_count": info.get('error_count', 0),
"last_error": info.get('last_error', 0)
}
for name, info in self.subsystem_status.items()
},
"metrics": self.metrics.get_metrics(),
"environment": self.environment.get_system_info() if hasattr(self, 'environment') else {}
}
# 添加子系统状态
for name in ['健康系统', '情感系统', '记忆系统', '模型管理器', '认知架构', '通信系统']:
attr_name = name.lower().replace(' ', '_')
if hasattr(self, attr_name) and hasattr(getattr(self, attr_name), 'get_status'):
status_data[name] = getattr(self, attr_name).get_status()
return status_data
def shutdown(self):
"""关闭智能体"""
self.logger.info("🛑 正在关闭智能体...")
self._running = False
# 停止线程池
executor.shutdown(wait=False)
# 保存状态
self._save_subsystem_status()
# 等待后台线程
if self._background_thread and self._background_thread.is_alive():
self._background_thread.join(timeout=5.0)
if self._background_thread.is_alive():
self.logger.warning("后台线程未正常退出")
self.logger.info("✅ 智能体已关闭")
“
最新发布