Pythony应用：跨语言批量文件执行器，

跨语言批量执行器设计与实现

最新推荐文章于 2025-11-26 15:40:52 发布

原创最新推荐文章于 2025-11-26 15:40:52 发布 · 263 阅读

5 ·

CC 4.0 BY-SA版权

文章标签：

#python

新星杯·14天创作挑战营·第17期 10w+人浏览 644人参与

跨语言批量文件执行器，旨在打破传统批量执行的限制，支持多种编程语言和脚本类型的统一执行管理。这个工具特别适合需要批量处理多语言脚本的开发运维场景，解决了传统方式需要针对不同语言分别编写执行逻辑的痛点。

核心价值

统一管理 - 用一个工具管理多种语言脚本
性能优化 - 并发执行大幅提升效率
安全可靠 - 多层防护防止误操作
灵活扩展 - 易于添加新的语言支持
监控追溯 - 完整的执行历史记录

使用场景

1. CI/CD流水线，自动化执行测试脚本、构建脚本等。

python executor.py /path/to/scripts --strategy parallel --workers 10

2. 数据处理工作流，顺序执行数据清洗、转换、分析脚本。

python executor.py /project --recursive --report build_report.json

3. 多语言项目构建，统一执行混合语言项目的构建脚本。

python executor.py /project --recursive --report build_report.json

4. 安全审计，在安全模式下批量检查可疑脚本。

python executor.py /scripts --config security_config.json

主要代码如下：

import os
import sys
import subprocess
import threading
import time
from pathlib import Path
from typing import List, Dict, Callable, Any
import logging
import argparse
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

class BoundaryBreakingBatchExecutor:
    """
    批量文件执行器
    突破传统限制：支持多种文件类型、并发执行、动态参数等
    """
    
    def __init__(self, config_file: str = None):
        self.logger = self._setup_logging()
        self.execution_history = []
        self.supported_extensions = {
            '.py': self._execute_python,
            '.sh': self._execute_shell,
            '.bat': self._execute_batch,
            '.ps1': self._execute_powershell,
            '.js': self._execute_javascript,
            '.rb': self._execute_ruby,
            '.php': self._execute_php,
            '.pl': self._execute_perl,
            '.r': self._execute_r,
            '.java': self._execute_java,
        }
        
        # 加载配置
        self.config = self._load_config(config_file)
        
    def _setup_logging(self) -> logging.Logger:
        """设置日志系统"""
        logger = logging.getLogger('BoundaryBreakingExecutor')
        logger.setLevel(logging.INFO)
        
        if not logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
            )
            handler.setFormatter(formatter)
            logger.addHandler(handler)
            
        return logger
    
    def _load_config(self, config_file: str) -> Dict:
        """加载配置文件"""
        default_config = {
            "max_workers": 5,
            "timeout": 300,
            "safe_mode": True,
            "allowed_directories": [],
            "blacklisted_commands": ["rm -rf", "format", "del *"],
            "environment_variables": {},
            "execution_strategy": "parallel"  # parallel, sequential, priority
        }
        
        if config_file and os.path.exists(config_file):
            try:
                with open(config_file, 'r', encoding='utf-8') as f:
                    user_config = json.load(f)
                    default_config.update(user_config)
            except Exception as e:
                self.logger.warning(f"配置文件加载失败，使用默认配置: {e}")
                
        return default_config
    
    def scan_directory(self, directory: str, recursive: bool = True) -> List[Path]:
        """
        扫描目录中的可执行文件
        支持多种文件类型和递归扫描
        """
        if not os.path.exists(directory):
            raise FileNotFoundError(f"目录不存在: {directory}")
            
        if self.config["safe_mode"] and directory not in self.config["allowed_directories"]:
            raise PermissionError(f"安全模式：目录 {directory} 不在允许列表中")
            
        file_paths = []
        
        if recursive:
            for root, dirs, files in os.walk(directory):
                for file in files:
                    file_path = Path(root) / file
                    if self._is_executable_file(file_path):
                        file_paths.append(file_path)
        else:
            for item in os.listdir(directory):
                file_path = Path(directory) / item
                if file_path.is_file() and self._is_executable_file(file_path):
                    file_paths.append(file_path)
                    
        return sorted(file_paths)
    
    def _is_executable_file(self, file_path: Path) -> bool:
        """判断是否为可执行文件"""
        # 检查文件扩展名
        if file_path.suffix.lower() in self.supported_extensions:
            return True
            
        # 突破边界：检查文件是否有执行权限（Unix系统）
        if os.name != 'nt':  # 非Windows系统
            return os.access(file_path, os.X_OK)
            
        return False
    
    def execute_files(self, file_paths: List[Path], 
                     execution_strategy: str = None,
                     max_workers: int = None) -> Dict[Path, Dict]:
        """
        执行文件列表
        支持多种执行策略和并发控制
        """
        strategy = execution_strategy or self.config["execution_strategy"]
        workers = max_workers or self.config["max_workers"]
        
        self.logger.info(f"开始执行 {len(file_paths)} 个文件，策略: {strategy}")
        
        results = {}
        
        if strategy == "sequential":
            results = self._execute_sequential(file_paths)
        elif strategy == "parallel":
            results = self._execute_parallel(file_paths, workers)
        elif strategy == "priority":
            results = self._execute_with_priority(file_paths, workers)
        else:
            raise ValueError(f"不支持的执行策略: {strategy}")
            
        self._log_execution_summary(results)
        return results
    
    def _execute_sequential(self, file_paths: List[Path]) -> Dict[Path, Dict]:
        """顺序执行"""
        results = {}
        for file_path in file_paths:
            results[file_path] = self._execute_single_file(file_path)
        return results
    
    def _execute_parallel(self, file_paths: List[Path], max_workers: int) -> Dict[Path, Dict]:
        """并行执行"""
        results = {}
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_file = {
                executor.submit(self._execute_single_file, file_path): file_path 
                for file_path in file_paths
            }
            
            for future in as_completed(future_to_file):
                file_path = future_to_file[future]
                try:
                    results[file_path] = future.result()
                except Exception as e:
                    results[file_path] = {
                        "success": False,
                        "output": "",
                        "error": str(e),
                        "execution_time": 0
                    }
                    
        return results
    
    def _execute_with_priority(self, file_paths: List[Path], max_workers: int) -> Dict[Path, Dict]:
        """基于优先级的执行"""
        # 简单的优先级策略：按文件扩展名和大小
        prioritized_files = self._prioritize_files(file_paths)
        return self._execute_parallel(prioritized_files, max_workers)
    
    def _prioritize_files(self, file_paths: List[Path]) -> List[Path]:
        """文件优先级排序"""
        def get_priority(file_path: Path) -> tuple:
            # 优先级规则：脚本文件优先，小文件优先
            extension_priority = {
                '.py': 1, '.sh': 2, '.bat': 3, '.ps1': 4,
                '.js': 5, '.rb': 6, '.php': 7, '.pl': 8, '.r': 9, '.java': 10
            }
            
            priority = extension_priority.get(file_path.suffix.lower(), 99)
            file_size = file_path.stat().st_size
            
            return (priority, file_size)
        
        return sorted(file_paths, key=get_priority)
    
    def _execute_single_file(self, file_path: Path) -> Dict[str, Any]:
        """执行单个文件"""
        start_time = time.time()
        
        try:
            # 安全检查
            if not self._security_check(file_path):
                return {
                    "success": False,
                    "output": "",
                    "error": "安全检查失败",
                    "execution_time": 0
                }
            
            # 获取对应的执行函数
            executor = self.supported_extensions.get(file_path.suffix.lower())
            if not executor:
                return {
                    "success": False,
                    "output": "",
                    "error": f"不支持的文件类型: {file_path.suffix}",
                    "execution_time": 0
                }
            
            # 执行文件
            result = executor(file_path)
            execution_time = time.time() - start_time
            
            # 记录执行历史
            self._record_execution(file_path, result, execution_time)
            
            return {
                "success": True,
                "output": result,
                "error": "",
                "execution_time": execution_time
            }
            
        except Exception as e:
            execution_time = time.time() - start_time
            self.logger.error(f"执行文件 {file_path} 时出错: {e}")
            
            return {
                "success": False,
                "output": "",
                "error": str(e),
                "execution_time": execution_time
            }
    
    def _security_check(self, file_path: Path) -> bool:
        """安全检查"""
        if not self.config["safe_mode"]:
            return True
            
        # 检查文件内容是否包含黑名单命令
        try:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read().lower()
                
            for banned_cmd in self.config["blacklisted_commands"]:
                if banned_cmd.lower() in content:
                    self.logger.warning(f"文件 {file_path} 包含禁止的命令: {banned_cmd}")
                    return False
        except:
            pass  # 如果无法读取文件内容，继续执行
            
        return True
    
    def _record_execution(self, file_path: Path, result: str, execution_time: float):
        """记录执行历史"""
        record = {
            "file_path": str(file_path),
            "timestamp": time.time(),
            "execution_time": execution_time,
            "result_preview": result[:100] + "..." if len(result) > 100 else result
        }
        self.execution_history.append(record)
    
    # 各种文件类型的执行方法
    def _execute_python(self, file_path: Path) -> str:
        """执行Python文件"""
        try:
            result = subprocess.run(
                [sys.executable, str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_shell(self, file_path: Path) -> str:
        """执行Shell脚本"""
        try:
            result = subprocess.run(
                ['bash', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_batch(self, file_path: Path) -> str:
        """执行批处理文件"""
        try:
            result = subprocess.run(
                ['cmd', '/c', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_powershell(self, file_path: Path) -> str:
        """执行PowerShell脚本"""
        try:
            result = subprocess.run(
                ['powershell', '-File', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_javascript(self, file_path: Path) -> str:
        """执行JavaScript文件"""
        try:
            result = subprocess.run(
                ['node', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_ruby(self, file_path: Path) -> str:
        """执行Ruby文件"""
        try:
            result = subprocess.run(
                ['ruby', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_php(self, file_path: Path) -> str:
        """执行PHP文件"""
        try:
            result = subprocess.run(
                ['php', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_perl(self, file_path: Path) -> str:
        """执行Perl文件"""
        try:
            result = subprocess.run(
                ['perl', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_r(self, file_path: Path) -> str:
        """执行R脚本"""
        try:
            result = subprocess.run(
                ['Rscript', str(file_path)],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return result.stdout + result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _execute_java(self, file_path: Path) -> str:
        """执行Java文件"""
        try:
            # 编译Java文件
            compile_result = subprocess.run(
                ['javac', str(file_path)],
                capture_output=True,
                text=True
            )
            
            if compile_result.returncode != 0:
                return f"编译错误: {compile_result.stderr}"
            
            # 执行编译后的类文件
            class_name = file_path.stem
            class_dir = file_path.parent
            execute_result = subprocess.run(
                ['java', '-cp', str(class_dir), class_name],
                capture_output=True,
                text=True,
                timeout=self.config["timeout"],
                env={**os.environ, **self.config["environment_variables"]}
            )
            return execute_result.stdout + execute_result.stderr
        except subprocess.TimeoutExpired:
            return "执行超时"
    
    def _log_execution_summary(self, results: Dict[Path, Dict]):
        """记录执行摘要"""
        successful = sum(1 for r in results.values() if r["success"])
        failed = len(results) - successful
        total_time = sum(r["execution_time"] for r in results.values())
        
        self.logger.info(f"执行完成: 成功 {successful}, 失败 {failed}, 总耗时: {total_time:.2f}秒")
        
        if failed > 0:
            failed_files = [str(path) for path, result in results.items() if not result["success"]]
            self.logger.warning(f"失败的文件: {failed_files}")
    
    def generate_report(self, output_file: str = None) -> str:
        """生成执行报告"""
        report = {
            "timestamp": time.time(),
            "total_executions": len(self.execution_history),
            "recent_executions": self.execution_history[-10:],  # 最近10次执行
            "summary": {
                "successful": sum(1 for r in self.execution_history if "success" in str(r)),
                "failed": len(self.execution_history) - sum(1 for r in self.execution_history if "success" in str(r))
            }
        }
        
        report_json = json.dumps(report, indent=2, ensure_ascii=False)
        
        if output_file:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(report_json)
                
        return report_json

def main():
    """主函数 - 命令行接口"""
    parser = argparse.ArgumentParser(description='批量文件执行器')
    parser.add_argument('directory', help='要扫描的目录路径')
    parser.add_argument('--config', '-c', help='配置文件路径')
    parser.add_argument('--strategy', '-s', 
                       choices=['sequential', 'parallel', 'priority'],
                       help='执行策略')
    parser.add_argument('--workers', '-w', type=int, help='最大工作线程数')
    parser.add_argument('--recursive', '-r', action='store_true', 
                       help='递归扫描子目录')
    parser.add_argument('--report', help='生成报告文件路径')
    
    args = parser.parse_args()
    
    # 创建执行器
    executor = BoundaryBreakingBatchExecutor(args.config)
    
    try:
        # 扫描文件
        files = executor.scan_directory(args.directory, args.recursive)
        print(f"找到 {len(files)} 个可执行文件")
        
        # 执行文件
        results = executor.execute_files(
            files, 
            execution_strategy=args.strategy,
            max_workers=args.workers
        )
        
        # 生成报告
        if args.report:
            report = executor.generate_report(args.report)
            print(f"报告已保存到: {args.report}")
        else:
            report = executor.generate_report()
            print("执行报告:", report)
            
    except Exception as e:
        print(f"错误: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()