命令行启动的线程模型优化

本文链接：https://blog.youkuaiyun.com/Tekim/article/details/115920593

博客内容介绍了对一个基于Java的命令行线程模型进行优化的过程，涉及到CPU核心池的使用、线程绑定、阻塞队列和线程池的调度。作者详细解析了CommandExecutor如何通过CommandThread执行命令，并探讨了如何减少线程层级以提升效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

熟悉代码

腾哥给了些任务

通读完judger的核心代码，跟黄学长联系修改完OLE，就开始尝试研究命令行启动的线程模型优化。(因为judger的代码在github上是公开的，so我这里就直接贴腾哥的代码啦)

@author：张腾学长
public class CommandExecutor {

    // CPU 池
    private final Queue<Integer> cpuPool;

    // 线程池
    private final CompletionService<CommandExecuteResult> threadPool;

    /**
     * @Description 提交一个异步任务
     **/
    public void submit(Command command) {
        threadPool.submit(new CommandThread(command, cpuPool));
    }

    /**
     * @Description 获取一个任务的执行结果，顺序任意取决于任务完成顺序
     * @return cn.edu.sdu.qd.oj.judger.dto.CommandExecResult
     **/
    public CommandExecuteResult take() throws InterruptedException, ExecutionException {
        return threadPool.take().get();
    }

    public CommandExecutor() {
        // 初始化 cpu 池,从本机配置文件中读取运行的核心都有哪些
        cpuPool = new LinkedBlockingDeque<>(CpuConfig.getCpuSet());
        // 初始化线程池
        threadPool = new ExecutorCompletionService<>(new ThreadPoolExecutor(
                cpuPool.size(),
                cpuPool.size(),
                0,
                TimeUnit.SECONDS,
                new LinkedBlockingQueue<>(1024),
                new ThreadPoolExecutor.CallerRunsPolicy())
        );
        log.info("init threadPool {}", cpuPool.size());
    }


    private static class CommandThread implements Callable<CommandExecuteResult> {

        private final Command command;

        private final Queue<Integer> cpuPool;

        public CommandThread(Command command, Queue<Integer> cpuPool) {
            this.command = command;
            this.cpuPool = cpuPool;
        }

        @Override
        public CommandExecuteResult call() throws Exception {
            log.info("exec {}", command.toString());
            Integer coreNo = null;
            try {
                coreNo = cpuPool.poll();
                log.info("cpu consume {}", coreNo);
                return command.run(coreNo != null ? coreNo : 0);
            } finally {
                log.info("cpu release {}", coreNo);
                if (coreNo != null) {
                    cpuPool.offer(coreNo);
                }
            }
        }
    }
}

cpupool是cpu核心池，通过读取本机的cpu配置文件获得，是一些离散的数字，所以用双端队列储存，从头取，运行结束放回到队伍尾部。

@author：张腾学长
public class SandboxRunner {

    private static final String SANDBOX_PATH = "/usr/bin/sandbox";

    public static SandboxResultDTO run(String cwd, Argument... args) throws SystemErrorException {
        return run(CpuConfig.getCpuSet().iterator().next(),cwd, args);
    }

    public static SandboxResultDTO run(int coreNo, String cwd, Argument... args) throws SystemErrorException {
        List<String> commandList = new ArrayList<>();

        commandList.add("sudo");
        commandList.add("taskset");
        commandList.add("-c");
        commandList.add(String.valueOf(coreNo));
        commandList.add(SANDBOX_PATH);

        for (Argument arg : args) {
            if (arg == null || arg.value == null) {
                continue;
            }
            if (arg.key.clz == String.class) {
                commandList.add(String.format("--%s=\"%s\"", arg.key, arg.value));
            } else if (arg.key.clz == String[].class) {
                String[] values = (String[]) arg.key.clz.cast(arg.value);
                for (String value : values) {
                    commandList.add(String.format("--%s=\"%s\"", arg.key, value));
                }
            } else {
                commandList.add(String.format("--%s=%s", arg.key, arg.value));
            }
        }

        ProcessUtils.ProcessStatus processStatus = ProcessUtils.cmd(cwd, commandList.toArray(new String[0]));
        if (processStatus.exitCode != 0) {
            throw new SystemErrorException(String.format("Sandbox exits abnormally: %d", processStatus.exitCode));
        }
        log.info("\nsandbox output: {}", processStatus.output);
        return JSON.parseObject(processStatus.output, SandboxResultDTO.class);
    }
}

这边是启动命令行taskset绑定对应运行核心，cwd对应的是题目的设置。

public static ProcessStatus cmd(String pwd, final String... commands) throws SystemErrorException {
        log.info("Run CommandLine\npwd: {}\ncommand: {}\n", pwd, String.join(" ", commands));
        Process process = null;
        Worker worker = null;
        try {
            process = new ProcessBuilder("/bin/sh", "-c", String.join(" ", commands))
                    .directory(Optional.ofNullable(pwd).filter(StringUtils::isNotBlank).map(File::new).orElse(null))
                    .redirectErrorStream(true)
                    .start();

            worker = new Worker(process);
            worker.start();
            ProcessStatus ps = worker.getProcessStatus();
            worker.join(120000);    /* 最多运行 120s */
            if (ps.exitCode == ProcessStatus.CODE_STARTED) {
                // not finished
                worker.interrupt();
                throw new SystemErrorException("Timeout");
            } else {
                return ps;//返回结果
            }
        } catch (InterruptedException | IOException e) {
            // canceled by other thread.
            worker.interrupt();
            throw new SystemErrorException("Canceled by other thread");
        } finally {
            process.destroy();
        }
    }

还是每太明白是怎么多核处理的，这不就绑定了一个核心吗，明天研究一下内存分配。

好了搞清楚内部的运行规律了。
首先提交IOJudgeCommand类的任务给commandExecutor

// 提交评测任务到线程池
        for (int i = 0, checkpointNum = checkpoints.size(); i < checkpointNum; ++i) {
            String checkpointId = String.valueOf(checkpoints.get(i).getCheckpointId());
            String inputPath = Paths.get(PathConfig.DATA_DIR, checkpointId + ".in").toString();
            String answerPath = Paths.get(PathConfig.DATA_DIR, checkpointId + ".ans").toString();
            String outputPath = Paths.get(userOutputDir, checkpointId + ".out").toString();

            Integer checkpointScore = checkpoints.get(i).getCheckpointScore();

            //提交异步任务，IOJudgeCommand是command的继承，重写了run方法，实际上是自己的执行方法
            commandExecutor.submit(new IOJudgeCommand(submissionId, i, checkpointScore, timeLimit, memoryLimit, outputLimit, inputPath, outputPath, answerPath, runConfig));
        }

而CommandExecutor 内部有一个阻塞队列和线程池
通过线程池进行任务的提交和返回，提交的是CommandThread类，内部执行call函数运行command的run方法，

@Slf4j
@Component
public class CommandExecutor {

    // CPU 池，是一个在这里面是一个阻塞队列
    private final Queue<Integer> cpuPool;

    // 线程池
    private final CompletionService<CommandExecuteResult> threadPool;

    /**
     * @Description 提交一个异步任务,把提交任务时候的线程池复制给一个新的CommandThread，
     **/
    public void submit(Command command) {
        threadPool.submit(new CommandThread(command, cpuPool));
    }

    /**
     * @Description 获取一个任务的执行结果，顺序任意取决于任务完成顺序
     * @return cn.edu.sdu.qd.oj.judger.dto.CommandExecResult
     **/
    public CommandExecuteResult take() throws InterruptedException, ExecutionException {
        return threadPool.take().get();
    }

    public CommandExecutor() {
        // 初始化 cpu 池,从本机配置文件中读取运行的核心都有哪些
        cpuPool = new LinkedBlockingDeque<>(CpuConfig.getCpuSet());
        // 初始化线程池
        threadPool = new ExecutorCompletionService<>(new ThreadPoolExecutor(
                cpuPool.size(),
                cpuPool.size(),
                0,
                TimeUnit.SECONDS,
                new LinkedBlockingQueue<>(1024),
                new ThreadPoolExecutor.CallerRunsPolicy())
        );
        log.info("init threadPool {}", cpuPool.size());
    }


    private static class CommandThread implements Callable<CommandExecuteResult> {

        private final Command command;

        private final Queue<Integer> cpuPool;

        public CommandThread(Command command, Queue<Integer> cpuPool) {
            this.command = command;
            this.cpuPool = cpuPool;
        }

        @Override
        public CommandExecuteResult call() throws Exception {
            log.info("exec {}", command.toString());
            Integer coreNo = null;
            try {
                coreNo = cpuPool.poll();
                log.info("cpu consume {}", coreNo);
                return command.run(coreNo != null ? coreNo : 0);//这里调用的command是外面一层包装的run函数，
                // 是实际执行的方法，所以说CommandThread调用的是call方法，call方法调用的是内部的command任务的ran函数
            } finally {
                log.info("cpu release {}", coreNo);
                if (coreNo != null) {
                    cpuPool.offer(coreNo);
                }
            }
        }
    }
}

而我们主要的cmd操作都在command的run 方法之中，

private class IOJudgeCommand implements Command {

        private final long submissionId;
        private final int caseNo;
        private final int score;
        private final String outputPath;
        private final String answerPath;

        private final Argument[] runCommand;

        IOJudgeCommand(long submissionId, int caseNo, int score, int timeLimit, int memoryLimit, int outputLimit,String inputPath,
                       String outputPath, String answerPath, JudgeTemplateConfigDTO.TemplateConfig.Run runConfig) throws SystemErrorException {
            this.submissionId = submissionId;
            this.caseNo = caseNo;
            this.score = score;
            this.outputPath = outputPath;
            this.answerPath = answerPath;

            String[] _commands = IOSubmissionHandler.WHITESPACE_PATTERN.split(runConfig.getCommand().trim());

            runCommand = ArrayUtils.toArray(
                    new Argument(SandboxArgument.MAX_CPU_TIME, timeLimit * runConfig.getMaxCpuTimeFactor()),
                    new Argument(SandboxArgument.MAX_REAL_TIME, timeLimit * runConfig.getMaxRealTimeFactor()),
                    new Argument(SandboxArgument.MAX_MEMORY, memoryLimit * runConfig.getMaxMemoryFactor() * 1024L),
                    new Argument(SandboxArgument.MAX_OUTPUT_SIZE, outputLimit * runConfig.getMaxMemoryFactor() * 1024L),
                    new Argument(SandboxArgument.MAX_STACK, 128L * 1024 * 1024),
                    new Argument(SandboxArgument.EXE_PATH, _commands[0]),
                    new Argument(SandboxArgument.EXE_ARGS, Arrays.copyOfRange(_commands, 1, _commands.length)),
                    new Argument(SandboxArgument.EXE_ENVS, runConfig.getEnvs()),
                    new Argument(SandboxArgument.INPUT_PATH, inputPath),
                    new Argument(SandboxArgument.OUTPUT_PATH, outputPath),
                    new Argument(SandboxArgument.UID, PathConfig.NOBODY_UID),
                    new Argument(SandboxArgument.GID, PathConfig.NOBODY_GID)
            );
        }

        @Override//这个run是包装command的内部方法，使用CommandThread的call方法去使用，而CommandThread受线程池的调度
        public CommandExecuteResult<CheckpointResultMessageDTO> run(int coreNo) {
            CommandExecuteResult<CheckpointResultMessageDTO> commandExecuteResult = null;
            try {
                SandboxResultDTO judgeResult = SandboxRunner.run(coreNo, workspaceDir, runCommand);//运行沙盒获得运行结果
                if (SandboxResult.SYSTEM_ERROR.equals(judgeResult.getResult())) {
                    throw new SystemErrorException(String.format("Sandbox Internal Error #%d, signal #%d", judgeResult.getError(), judgeResult.getSignal()));
                } else if (SandboxResult.SUCCESS.equals(judgeResult.getResult())) {
                    SubmissionJudgeResult result = check();//这里的ac是指程序在限制下成功运行了，下面进行文件比对，判断是AC还是WA
                    commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
                            submissionId, caseNo, result.code,
                            SubmissionJudgeResult.AC.code == result.code ? score : 0, judgeResult.getCpuTime(), judgeResult.getMemory()
                    ));
                } else {
                    commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
                            submissionId, caseNo, SandboxResult.of(judgeResult.getResult()).submissionJudgeResult.code,
                            0, judgeResult.getCpuTime(), judgeResult.getMemory()
                    ));
                }
            } catch (SystemErrorException e) {
                log.warn("", e);
                judgeLog += e + "\n";
                commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
                        submissionId, caseNo, SubmissionJudgeResult.SE.code,
                        0, 0, 0
                ));
            } catch (Exception e) {
                log.warn("", e);
                throw e;
            }
            log.info("case {} finish", caseNo);
            return commandExecuteResult;
        }

        private SubmissionJudgeResult check() throws SystemErrorException {
            ProcessUtils.ProcessStatus processStatus = ProcessUtils.cmd(workspaceDir, "sudo", "diff", answerPath, outputPath, "--ignore-space-change", "--ignore-blank-lines");
            return processStatus.exitCode == 0 ? SubmissionJudgeResult.AC : SubmissionJudgeResult.WA;
        }
    }

总结：线程套线程，先把command包装传递给CommandExecutor，通过堵塞队列和线程池来进行任务的调度，使用异步的submit进行任务的提交，线程池进行任务的调度和cpu核心的使用，当任务开始启动的时候，调用sandbox的进行核心的绑定和程序的设定拼装成cmd字符串传递给ProcessUtils.cmd，它去执行对应的cmd命令，启动一个新的线程（被包装成work）去调用沙盒运行，Work类里面封装了对应的返回方法。