熟悉代码
腾哥给了些任务
通读完judger的核心代码,跟黄学长联系修改完OLE,就开始尝试研究命令行启动的线程模型优化。(因为judger的代码在github上是公开的,so我这里就直接贴腾哥的代码啦)
@author:张腾学长
public class CommandExecutor {
// CPU 池
private final Queue<Integer> cpuPool;
// 线程池
private final CompletionService<CommandExecuteResult> threadPool;
/**
* @Description 提交一个异步任务
**/
public void submit(Command command) {
threadPool.submit(new CommandThread(command, cpuPool));
}
/**
* @Description 获取一个任务的执行结果,顺序任意取决于任务完成顺序
* @return cn.edu.sdu.qd.oj.judger.dto.CommandExecResult
**/
public CommandExecuteResult take() throws InterruptedException, ExecutionException {
return threadPool.take().get();
}
public CommandExecutor() {
// 初始化 cpu 池,从本机配置文件中读取运行的核心都有哪些
cpuPool = new LinkedBlockingDeque<>(CpuConfig.getCpuSet());
// 初始化线程池
threadPool = new ExecutorCompletionService<>(new ThreadPoolExecutor(
cpuPool.size(),
cpuPool.size(),
0,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(1024),
new ThreadPoolExecutor.CallerRunsPolicy())
);
log.info("init threadPool {}", cpuPool.size());
}
private static class CommandThread implements Callable<CommandExecuteResult> {
private final Command command;
private final Queue<Integer> cpuPool;
public CommandThread(Command command, Queue<Integer> cpuPool) {
this.command = command;
this.cpuPool = cpuPool;
}
@Override
public CommandExecuteResult call() throws Exception {
log.info("exec {}", command.toString());
Integer coreNo = null;
try {
coreNo = cpuPool.poll();
log.info("cpu consume {}", coreNo);
return command.run(coreNo != null ? coreNo : 0);
} finally {
log.info("cpu release {}", coreNo);
if (coreNo != null) {
cpuPool.offer(coreNo);
}
}
}
}
}
cpupool是cpu核心池,通过读取本机的cpu配置文件获得,是一些离散的数字,所以用双端队列储存,从头取,运行结束放回到队伍尾部。
@author:张腾学长
public class SandboxRunner {
private static final String SANDBOX_PATH = "/usr/bin/sandbox";
public static SandboxResultDTO run(String cwd, Argument... args) throws SystemErrorException {
return run(CpuConfig.getCpuSet().iterator().next(),cwd, args);
}
public static SandboxResultDTO run(int coreNo, String cwd, Argument... args) throws SystemErrorException {
List<String> commandList = new ArrayList<>();
commandList.add("sudo");
commandList.add("taskset");
commandList.add("-c");
commandList.add(String.valueOf(coreNo));
commandList.add(SANDBOX_PATH);
for (Argument arg : args) {
if (arg == null || arg.value == null) {
continue;
}
if (arg.key.clz == String.class) {
commandList.add(String.format("--%s=\"%s\"", arg.key, arg.value));
} else if (arg.key.clz == String[].class) {
String[] values = (String[]) arg.key.clz.cast(arg.value);
for (String value : values) {
commandList.add(String.format("--%s=\"%s\"", arg.key, value));
}
} else {
commandList.add(String.format("--%s=%s", arg.key, arg.value));
}
}
ProcessUtils.ProcessStatus processStatus = ProcessUtils.cmd(cwd, commandList.toArray(new String[0]));
if (processStatus.exitCode != 0) {
throw new SystemErrorException(String.format("Sandbox exits abnormally: %d", processStatus.exitCode));
}
log.info("\nsandbox output: {}", processStatus.output);
return JSON.parseObject(processStatus.output, SandboxResultDTO.class);
}
}
这边是启动命令行taskset绑定对应运行核心,cwd对应的是题目的设置。
public static ProcessStatus cmd(String pwd, final String... commands) throws SystemErrorException {
log.info("Run CommandLine\npwd: {}\ncommand: {}\n", pwd, String.join(" ", commands));
Process process = null;
Worker worker = null;
try {
process = new ProcessBuilder("/bin/sh", "-c", String.join(" ", commands))
.directory(Optional.ofNullable(pwd).filter(StringUtils::isNotBlank).map(File::new).orElse(null))
.redirectErrorStream(true)
.start();
worker = new Worker(process);
worker.start();
ProcessStatus ps = worker.getProcessStatus();
worker.join(120000); /* 最多运行 120s */
if (ps.exitCode == ProcessStatus.CODE_STARTED) {
// not finished
worker.interrupt();
throw new SystemErrorException("Timeout");
} else {
return ps;//返回结果
}
} catch (InterruptedException | IOException e) {
// canceled by other thread.
worker.interrupt();
throw new SystemErrorException("Canceled by other thread");
} finally {
process.destroy();
}
}
还是每太明白是怎么多核处理的,这不就绑定了一个核心吗,明天研究一下内存分配。
好了搞清楚内部的运行规律了。
首先提交IOJudgeCommand类的任务给commandExecutor
// 提交评测任务到线程池
for (int i = 0, checkpointNum = checkpoints.size(); i < checkpointNum; ++i) {
String checkpointId = String.valueOf(checkpoints.get(i).getCheckpointId());
String inputPath = Paths.get(PathConfig.DATA_DIR, checkpointId + ".in").toString();
String answerPath = Paths.get(PathConfig.DATA_DIR, checkpointId + ".ans").toString();
String outputPath = Paths.get(userOutputDir, checkpointId + ".out").toString();
Integer checkpointScore = checkpoints.get(i).getCheckpointScore();
//提交异步任务,IOJudgeCommand是command的继承,重写了run方法,实际上是自己的执行方法
commandExecutor.submit(new IOJudgeCommand(submissionId, i, checkpointScore, timeLimit, memoryLimit, outputLimit, inputPath, outputPath, answerPath, runConfig));
}
而CommandExecutor 内部有一个阻塞队列和线程池
通过线程池进行任务的提交和返回,提交的是CommandThread类,内部执行call函数运行command的run方法,
@Slf4j
@Component
public class CommandExecutor {
// CPU 池,是一个在这里面是一个阻塞队列
private final Queue<Integer> cpuPool;
// 线程池
private final CompletionService<CommandExecuteResult> threadPool;
/**
* @Description 提交一个异步任务,把提交任务时候的线程池复制给一个新的CommandThread,
**/
public void submit(Command command) {
threadPool.submit(new CommandThread(command, cpuPool));
}
/**
* @Description 获取一个任务的执行结果,顺序任意取决于任务完成顺序
* @return cn.edu.sdu.qd.oj.judger.dto.CommandExecResult
**/
public CommandExecuteResult take() throws InterruptedException, ExecutionException {
return threadPool.take().get();
}
public CommandExecutor() {
// 初始化 cpu 池,从本机配置文件中读取运行的核心都有哪些
cpuPool = new LinkedBlockingDeque<>(CpuConfig.getCpuSet());
// 初始化线程池
threadPool = new ExecutorCompletionService<>(new ThreadPoolExecutor(
cpuPool.size(),
cpuPool.size(),
0,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(1024),
new ThreadPoolExecutor.CallerRunsPolicy())
);
log.info("init threadPool {}", cpuPool.size());
}
private static class CommandThread implements Callable<CommandExecuteResult> {
private final Command command;
private final Queue<Integer> cpuPool;
public CommandThread(Command command, Queue<Integer> cpuPool) {
this.command = command;
this.cpuPool = cpuPool;
}
@Override
public CommandExecuteResult call() throws Exception {
log.info("exec {}", command.toString());
Integer coreNo = null;
try {
coreNo = cpuPool.poll();
log.info("cpu consume {}", coreNo);
return command.run(coreNo != null ? coreNo : 0);//这里调用的command是外面一层包装的run函数,
// 是实际执行的方法,所以说CommandThread调用的是call方法,call方法调用的是内部的command任务的ran函数
} finally {
log.info("cpu release {}", coreNo);
if (coreNo != null) {
cpuPool.offer(coreNo);
}
}
}
}
}
而我们主要的cmd操作都在command的run 方法之中,
private class IOJudgeCommand implements Command {
private final long submissionId;
private final int caseNo;
private final int score;
private final String outputPath;
private final String answerPath;
private final Argument[] runCommand;
IOJudgeCommand(long submissionId, int caseNo, int score, int timeLimit, int memoryLimit, int outputLimit,String inputPath,
String outputPath, String answerPath, JudgeTemplateConfigDTO.TemplateConfig.Run runConfig) throws SystemErrorException {
this.submissionId = submissionId;
this.caseNo = caseNo;
this.score = score;
this.outputPath = outputPath;
this.answerPath = answerPath;
String[] _commands = IOSubmissionHandler.WHITESPACE_PATTERN.split(runConfig.getCommand().trim());
runCommand = ArrayUtils.toArray(
new Argument(SandboxArgument.MAX_CPU_TIME, timeLimit * runConfig.getMaxCpuTimeFactor()),
new Argument(SandboxArgument.MAX_REAL_TIME, timeLimit * runConfig.getMaxRealTimeFactor()),
new Argument(SandboxArgument.MAX_MEMORY, memoryLimit * runConfig.getMaxMemoryFactor() * 1024L),
new Argument(SandboxArgument.MAX_OUTPUT_SIZE, outputLimit * runConfig.getMaxMemoryFactor() * 1024L),
new Argument(SandboxArgument.MAX_STACK, 128L * 1024 * 1024),
new Argument(SandboxArgument.EXE_PATH, _commands[0]),
new Argument(SandboxArgument.EXE_ARGS, Arrays.copyOfRange(_commands, 1, _commands.length)),
new Argument(SandboxArgument.EXE_ENVS, runConfig.getEnvs()),
new Argument(SandboxArgument.INPUT_PATH, inputPath),
new Argument(SandboxArgument.OUTPUT_PATH, outputPath),
new Argument(SandboxArgument.UID, PathConfig.NOBODY_UID),
new Argument(SandboxArgument.GID, PathConfig.NOBODY_GID)
);
}
@Override//这个run是包装command的内部方法,使用CommandThread的call方法去使用,而CommandThread受线程池的调度
public CommandExecuteResult<CheckpointResultMessageDTO> run(int coreNo) {
CommandExecuteResult<CheckpointResultMessageDTO> commandExecuteResult = null;
try {
SandboxResultDTO judgeResult = SandboxRunner.run(coreNo, workspaceDir, runCommand);//运行沙盒获得运行结果
if (SandboxResult.SYSTEM_ERROR.equals(judgeResult.getResult())) {
throw new SystemErrorException(String.format("Sandbox Internal Error #%d, signal #%d", judgeResult.getError(), judgeResult.getSignal()));
} else if (SandboxResult.SUCCESS.equals(judgeResult.getResult())) {
SubmissionJudgeResult result = check();//这里的ac是指程序在限制下成功运行了,下面进行文件比对,判断是AC还是WA
commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
submissionId, caseNo, result.code,
SubmissionJudgeResult.AC.code == result.code ? score : 0, judgeResult.getCpuTime(), judgeResult.getMemory()
));
} else {
commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
submissionId, caseNo, SandboxResult.of(judgeResult.getResult()).submissionJudgeResult.code,
0, judgeResult.getCpuTime(), judgeResult.getMemory()
));
}
} catch (SystemErrorException e) {
log.warn("", e);
judgeLog += e + "\n";
commandExecuteResult = new CommandExecuteResult<>(new CheckpointResultMessageDTO(
submissionId, caseNo, SubmissionJudgeResult.SE.code,
0, 0, 0
));
} catch (Exception e) {
log.warn("", e);
throw e;
}
log.info("case {} finish", caseNo);
return commandExecuteResult;
}
private SubmissionJudgeResult check() throws SystemErrorException {
ProcessUtils.ProcessStatus processStatus = ProcessUtils.cmd(workspaceDir, "sudo", "diff", answerPath, outputPath, "--ignore-space-change", "--ignore-blank-lines");
return processStatus.exitCode == 0 ? SubmissionJudgeResult.AC : SubmissionJudgeResult.WA;
}
}
总结:线程套线程,先把command包装传递给CommandExecutor,通过堵塞队列和线程池来进行任务的调度,使用异步的submit进行任务的提交,线程池进行任务的调度和cpu核心的使用,当任务开始启动的时候,调用sandbox的进行核心的绑定和程序的设定拼装成cmd字符串传递给ProcessUtils.cmd,它去执行对应的cmd命令,启动一个新的线程(被包装成work)去调用沙盒运行,Work类里面封装了对应的返回方法。
优化
尝试把第二层套的线程给去掉。