Daemon Monitor Windows Services Status


# 1. 安装依赖 pip install prometheus_client # 2. 创建/修改 core/config.py cat > core/config.py <<EOF class CoreConfig: def __init__(self): self.api_timeout = 30 self.max_threads = 8 self.debug_mode = True self.log_level = "INFO" @property def database_url(self): return "sqlite:///ai_system.db" EOF # 3. 修复导入点 # 在 agent/autonomous_agent.py 和 web_ui/server.py 中: sed -i 's/from core.config import system_config/from core.config import CoreConfig\nsystem_config = CoreConfig()/' agent/autonomous_agent.py sed -i 's/from core.config import CoreConfig as SystemConfig/from core.config import CoreConfig\nSystemConfig = CoreConfig()/' web_ui/server.py # 4. 重新启动系统 python -m model_server.api_server & python -m agent.agent_core & python -m web_ui.server & 这个放哪?是start_system.bat:“@echo off chcp 65001 > nul setlocal enabledelayedexpansion echo ================================================== echo AI系统启动脚本 v2.0 echo 最后更新: %date% %time% echo ================================================== :: 设置项目根目录 set "AI_SYSTEM_PATH=%~dp0" cd /d "%AI_SYSTEM_PATH%" :: 检查Python是否可用 echo 检查Python环境... python --version > nul 2>&1 if %errorlevel% neq 0 ( echo 错误: Python未找到。请确保Python已安装并添加到PATH pause exit /b 1 ) :: 检查虚拟环境 if exist ".venv\" ( echo 激活虚拟环境... call .venv\Scripts\activate ) else ( echo 警告: 未找到虚拟环境,使用系统Python ) :: 安装/更新依赖(如果指定了参数) if "%1"=="--install-deps" ( echo 安装/更新系统依赖... python -m pip uninstall modelscope -y python -m pip cache purge python -m pip install --upgrade pip python -m pip install --upgrade -r requirements.txt :: 安装平台特定依赖 if defined PROCESSOR_ARCHITEW6432 ( echo 安装Windows特定依赖... python -m pip install pywin32==306 ) else ( echo 安装Linux/Mac特定依赖... python -m pip install gpustat==1.1 ) echo ✅ 依赖安装完成 ) :: 设置环境变量 set "PYTHONPATH=%AI_SYSTEM_PATH%" set "LOG_LEVEL=DEBUG" :: 启动Python启动器 echo 启动AI系统... python start_system.py %* if %errorlevel% neq 0 ( echo 错误: 系统启动失败 (错误码: %errorlevel%) pause exit /b %errorlevel% ) echo 系统已成功启动 pause ”还是start_system.py:“import os import sys import subprocess import time import threading import signal import psutil import platform import logging import webbrowser from pathlib import Path from dotenv import load_dotenv # 加载环境变量 load_dotenv(override=True) class SystemStarter: """AI系统启动器 - 增强版""" def __init__(self): self.base_dir = Path(__file__).parent.resolve() self.log_dir = self.base_dir / "logs" self.log_dir.mkdir(exist_ok=True, parents=True) self.services = {} self.max_restarts = 5 # 最大重启次数 self.setup_logger() # 系统配置 self.config = { "required_services": [ { "name": "API Server", "script": "api_server.py", "cwd": self.base_dir, "port": 8000, "health_check": "/health" }, { "name": "AI Core", "script": "agent/agent_core.py", "cwd": self.base_dir / "agent", "env": {"AI_CORE_DEBUG": "1"} }, { "name": "Web UI", "script": "web_ui/server.py", "cwd": self.base_dir / "web_ui", "port": 5000, "open_browser": True } ], "monitor_interval": 10, # 监控间隔(秒) "start_delay": 3, # 服务启动延迟 "browser_delay": 5 # 浏览器打开延迟 } def setup_logger(self): """配置日志系统""" self.logger = logging.getLogger('SystemStarter') log_level = os.getenv("LOG_LEVEL", "INFO") self.logger.setLevel(log_level) # 文件处理器 log_file = self.log_dir / "system.log" file_handler = logging.FileHandler(log_file, encoding='utf-8') file_formatter = logging.Formatter( '%(asctime)s [%(levelname)s] %(name)s - %(message)s' ) file_handler.setFormatter(file_formatter) # 控制台处理器 console_handler = logging.StreamHandler() console_formatter = logging.Formatter( '[%(levelname)s] %(message)s' ) console_handler.setFormatter(console_formatter) self.logger.addHandler(file_handler) self.logger.addHandler(console_handler) self.logger.info(f"日志文件: {log_file}") def start_service(self, service_config): """启动单个服务并返回进程对象""" script_path = Path(service_config["script"]) if not script_path.exists(): self.logger.error(f"服务脚本不存在: {script_path}") return None cwd = Path(service_config.get("cwd", self.base_dir)) service_name = service_config["name"] log_file = self.log_dir / f"{service_name.lower().replace(' ', '_')}.log" # 构建环境变量 env = os.environ.copy() env.update(service_config.get("env", {})) env["SERVICE_NAME"] = service_name self.logger.info(f"启动 {service_name} 服务...") try: with open(log_file, "a", encoding="utf-8") as log: process = subprocess.Popen( ["python", str(script_path)], cwd=str(cwd), stdout=log, stderr=subprocess.STDOUT, text=True, encoding="utf-8", env=env ) except Exception as e: self.logger.error(f"启动服务失败: {str(e)}") return None # 记录服务信息 self.services[service_name] = { "process": process, "config": service_config, "start_time": time.time(), "log_file": log_file, "restart_count": 0, "last_restart": time.time() } # 等待服务初始化 time.sleep(service_config.get("start_delay", self.config["start_delay"])) return process def is_service_healthy(self, service_name): """检查服务健康状态""" service = self.services.get(service_name) if not service: return False # 检查进程是否运行 if service["process"].poll() is not None: return False # 如果有端口,尝试HTTP健康检查 port = service["config"].get("port") health_path = service["config"].get("health_check") if port and health_path: try: import requests response = requests.get( f"http://localhost:{port}{health_path}", timeout=2 ) return response.status_code == 200 except Exception: return False # 默认返回进程运行状态 return True def monitor_services(self): """监控服务状态并自动重启""" self.logger.info("\n" + "=" * 50) self.logger.info(" AI系统服务监控中 (按 Ctrl+C 退出)") self.logger.info("=" * 50) # 设置信号处理器 signal.signal(signal.SIGINT, self.handle_exit) signal.signal(signal.SIGTERM, self.handle_exit) try: while True: for service_name in list(self.services.keys()): service = self.services[service_name] # 检查服务健康状态 if not self.is_service_healthy(service_name): exit_code = service["process"].returncode if service["process"].poll() is not None else -1 runtime = time.time() - service["start_time"] self.logger.warning( f"服务 {service_name} 异常 | " f"运行时间: {runtime:.1f}秒 | " f"退出码: {exit_code} | " f"重启次数: {service['restart_count']}/{self.max_restarts}" ) # 检查重启次数限制 if service['restart_count'] >= self.max_restarts: self.logger.error(f"❌ 服务 {service_name} 达到最大重启次数,停止尝试") continue # 重启服务 self.logger.info(f"🔄 重启 {service_name} 服务...") new_process = self.start_service(service["config"]) if new_process: service["process"] = new_process service["start_time"] = time.time() service["restart_count"] += 1 service["last_restart"] = time.time() else: self.logger.error(f"❌ 无法重启 {service_name} 服务") # 显示状态 self.display_status() time.sleep(self.config["monitor_interval"]) except KeyboardInterrupt: self.logger.info("\n收到中断信号,正在关闭系统...") self.stop_all_services() def display_status(self): """显示服务状态概览""" status_lines = [] for service_name, service in self.services.items(): process = service["process"] status = "运行中" if self.is_service_healthy(service_name) else "异常" pid = process.pid runtime = time.time() - service["start_time"] restarts = service["restart_count"] # 获取内存使用 try: proc = psutil.Process(pid) memory = proc.memory_info().rss / 1024 ** 2 # MB cpu_percent = proc.cpu_percent(interval=0.1) memory_str = f"{memory:.1f} MB" cpu_str = f"{cpu_percent:.1f}%" except Exception: memory_str = "未知" cpu_str = "未知" status_lines.append( f"{service_name: <15} | {status: <8} | PID: {pid: <6} | " f"运行时间: {runtime:.1f}s | 内存: {memory_str} | CPU: {cpu_str} | " f"重启: {restarts}/{self.max_restarts}" ) # 清屏并显示状态 if platform.system() == "Windows": os.system("cls") else: os.system("clear") print("=" * 90) print(f"服务状态监控 | 系统: {platform.system()} {platform.release()}") print("=" * 90) print("服务名称 | 状态 | PID | 运行时间 | 内存使用 | CPU使用 | 重启次数") print("-" * 90) for line in status_lines: print(line) print("\n按 Ctrl+C 停止系统") def handle_exit(self, signum, frame): """处理退出信号""" self.logger.info(f"收到信号 {signum},正在关闭系统...") self.stop_all_services() sys.exit(0) def stop_all_services(self): """停止所有服务""" print("\n" + "=" * 50) print("停止所有服务") print("=" * 50) for service_name, service in self.services.items(): process = service["process"] if process.poll() is None: print(f"🛑 停止 {service_name} 服务 (PID: {process.pid})...") # 尝试优雅终止 try: # 发送Ctrl+C信号 if platform.system() == "Windows": import ctypes ctypes.windll.kernel32.GenerateConsoleCtrlEvent(0, process.pid) else: os.kill(process.pid, signal.SIGINT) # 等待最多10秒 for _ in range(10): if process.poll() is not None: break time.sleep(1) # 如果仍未停止,强制终止 if process.poll() is None: process.kill() print(f"⚠️ 强制终止 {service_name} 服务") except Exception as e: print(f"❌ 停止服务失败: {str(e)}") else: print(f"✅ {service_name} 服务已停止") def open_service_browser(self, service_config): """打开浏览器访问服务""" delay = service_config.get("browser_delay", self.config["browser_delay"]) port = service_config.get("port", 5000) url = f"http://localhost:{port}" time.sleep(delay) if not self.is_service_healthy(service_config["name"]): self.logger.warning(f"服务 {service_config['name']} 未就绪,跳过打开浏览器") return self.logger.info(f"打开浏览器: {url}") try: webbrowser.open(url) except Exception as e: self.logger.error(f"打开浏览器失败: {str(e)}") def start(self): """启动系统主入口""" print("=" * 50) print(f" AI系统启动器 v2.0 | Python {sys.version.split()[0]}") print("=" * 50) print(f"项目根目录: {self.base_dir}") print(f"操作系统: {platform.system()} {platform.release()}") print(f"处理器: {platform.processor()}") print(f"最大重启次数: {self.max_restarts}") print("-" * 50) # 启动所有服务 for service in self.config["required_services"]: self.start_service(service) # 如果服务配置了自动打开浏览器 if service.get("open_browser", False): threading.Thread( target=self.open_service_browser, args=(service,), daemon=True ).start() # 开始监控服务 self.monitor_services() if __name__ == "__main__": starter = SystemStarter() starter.start() ”还是新建一个文件?
08-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值