目录
开始
checkpoint流程入口:
# Dispatcher.runJob(JobGraph jobGraph)
private CompletableFuture<Void> runJob(JobGraph jobGraph) {
Preconditions.checkState(!jobManagerRunnerFutures.containsKey(jobGraph.getJobID()));
final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = createJobManagerRunner(jobGraph);
jobManagerRunnerFutures.put(jobGraph.getJobID(), jobManagerRunnerFuture);
return jobManagerRunnerFuture
.thenApply(FunctionUtils.uncheckedFunction(this::startJobManagerRunner))
.thenApply(FunctionUtils.nullFn())
.whenCompleteAsync(
(ignored, throwable) -> {
if (throwable != null) {
jobManagerRunnerFutures.remove(jobGraph.getJobID());
}
},
getMainThreadExecutor());
}
将JobGraph提交到Dispatcher后,会调用createJobManagerRunner和startJobManagerRunner。其中,
1、createJobManagerRunner实例创建checkpoint,并注册定时任务。
2、startJobManagerRunner启动checkpoint定时任务。
执行图:
构建checkpoint
createJobManagerRunner阶段,会创建一个JobManagerRunner实例,在该过程和checkpoint有关的是会启动listener去监听job的状态。流程如下:
// Dispatcher类
private CompletableFuture<JobManagerRunner> createJobManagerRunner(JobGraph jobGraph) {
final RpcService rpcService = getRpcService();
return CompletableFuture.supplyAsync(
() -> {
try {
return jobManagerRunnerFactory.createJobManagerRunner(
jobGraph,
configuration,
rpcService,
highAvailabilityServices,
heartbeatServices,
jobManagerSharedServices,
new DefaultJobManagerJobMetricGroupFactory(jobManagerMetricGroup),
fatalErrorHandler);
} catch (Exception e) {
throw new CompletionException(new JobExecutionException(jobGraph.getJobID(), "Could not instantiate JobManager.", e));
}
},
rpcService.getExecutor());
}
// 过程中会创建JobMaster,构建ExecutionGraph
// 过程JobManagerRunnerFactory.createJobManagerRunner -> new JobManagerRunnerImpl()-> DefaultJobMasterServiceFactory.createJobMasterService -> new JobMaster() -> createScheduler() -> DefaultSchedulerFactory.createInstance()-> new DefaultScheduler()-> createAndRestoreExecutionGraph -> createExecutionGraph() -> ExecutionGraphBuilder.buildGraph()
ExecutionGraphBuilder调用buildGraph()时,会调用ExecutionGraph.enableCheckpointing()方法,这个方法不管任务里有没有设置checkpoint都会调用的。
在enableCheckpointing()方法里会创建CheckpointCoordinator,这是负责checkpoint的核心实现类,同时会给job添加一个监听器CheckpointCoordinatorDeActivator(只有设置了checkpoint才会注册这个监听器),CheckpointCoordinatorDeActivator负责checkpoint的启动和停止。
源码如下:
// ExecutionGraphBuilder类
public static ExecutionGraph buildGraph(
@Nullable ExecutionGraph prior,
JobGraph jobGraph,
Configuration jobManagerConfig,
ScheduledExecutorService futureExecutor,
Executor ioExecutor,
SlotProvider slotProvider,
ClassLoader classLoader,
CheckpointRecoveryFactory recoveryFactory,
Time rpcTimeout,
RestartStrategy restartStrategy,
MetricGroup metrics,
BlobWriter blobWriter,
Time allocationTimeout,
Logger log,
ShuffleMaster<?> shuffleMaster,
JobMasterPartitionTracker partitionTracker,
FailoverStrategy.Factory failoverStrategyFactory) throws JobExecutionException, JobException {
checkNotNull(jobGraph, "job graph cannot be null");
...
// create a new execution graph, if none exists so far
final ExecutionGraph executionGraph;
try {
executionGraph = (prior != null) ? prior :
new ExecutionGraph(
jobInformation,
futureExecutor,
ioExecutor,
rpcTimeout,
restartStrategy,
maxPriorAttemptsHistoryLength,
failoverStrategyFactory,
slotProvider,
classLoader,
blobWriter,
allocationTimeout,
partitionReleaseStrategyFactory,
shuffleMaster,
partitionTracker,
jobGraph.getScheduleMode());
} catch (IOException e) {
throw new JobException("Could not create the ExecutionGraph.", e);
}
...
// configure the state checkpointing
JobCheckpointingSettings snapshotSettings = jobGraph.getCheckpointingSettings();
if (snapshotSettings != null) {
...
final CheckpointCoordinatorConfiguration chkConfig = snapshotSettings.getCheckpointCoordinatorConfiguration();
executionGraph.enableCheckpointing(
chkConfig,
triggerVertices,
ackVertices,
confirmVertices,
hooks,
checkpointIdCounter,
completedCheckpoints,
rootBackend,
checkpointStatsTracker);
}
...
return executionGraph;
}
在enableCheckpointing()方法中主要是创建了checkpoint失败时的failureManager处理、设置了checkpoint的核心类CheckpointCoordinator。
// ExecutionGraph类
public void enableCheckpointing(
CheckpointCoordinatorConfiguration chkConfig,
List<ExecutionJobVertex> verticesToTrigger,
List<ExecutionJobVertex> verticesToWaitFor,
List<ExecutionJobVertex> verticesToCommitTo,
List<MasterTriggerRestoreHook<?>> masterHooks,
CheckpointIDCounter checkpointIDCounter,
CompletedCheckpointStore checkpointStore,
StateBackend checkpointStateBackend,
CheckpointStatsTracker statsTracker) {
checkState(state == JobStatus.CREATED, "Job must be in CREATED state");
checkState(checkpointCoordinator == null, "checkpointing already enabled");
ExecutionVertex[] tasksToTrigger = collectExecutionVertices(verticesToTrigger);
ExecutionVertex[] tasksToWaitFor = collectExecutionVertices(verticesToWaitFor);
ExecutionVertex[] tasksToCommitTo = collectExecutionVertices(verticesToCommitTo);
final Collection<OperatorCoordinatorCheckpointContext> operatorCoordinators = buildOpCoordinatorCheckpointContexts();
checkpointStatsTracker = checkNotNull(statsTracker, "CheckpointStatsTracker");
CheckpointFailureManager failureManager = new CheckpointFailureManager(
chkConfig.getTolerableCheckpointFailureNumber(),
new CheckpointFailureManager.FailJobCallback() {
@Override
public void failJob(Throwable cause) {
getJobMasterMainThreadExecutor().execute(() -> failGlobal(cause));
}
@Override
public void failJobDueToTaskFailure(Throwable cause, ExecutionAttemptID failingTask) {
getJobMasterMainThreadExecutor().execute(() -> failGlobalIfExecutionIsStillRunning(cause, failingTask));
}
}
);
checkState(checkpointCoordinatorTimer == null);
checkpointCoordinatorTimer = Executors.newSingleThreadScheduledExecutor(
new DispatcherThreadFactory(
Thread.currentThread().getThreadGroup(), "Checkpoint Timer"));
// create the coordinator that triggers and commits checkpoints and holds the state
checkpointCoordinator = new CheckpointCoordinator(
jobInformation.getJobId(),
chkConfig,
tasksToTrigger,
tasksToWaitFor,
tasksToCommitTo,
operatorCoordinators,
checkpointIDCounter,
checkpointStore,
checkpointStateBackend,
ioExecutor,
new ScheduledExecutorServiceAdapter(checkpointCoordinatorTimer),
SharedStateRegistry.DEFAULT_FACTORY,
failureManager);
// register the master hooks on the checkpoint coordinator
for (MasterTriggerRestoreHook<?> hook : masterHooks) {
if (!checkpointCoordinator.addMasterHook(hook)) {
LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
}
}
checkpointCoordinator.setCheckpointStatsTracker(checkpointStatsTracker);
// 如果没有设置checkpoint的话,则不会注册这个checkpoint监听器
// interval of max long value indicates disable periodic checkpoint,
// the CheckpointActivatorDeactivator should be created only if the interval is not max value
if (chkConfig.getCheckpointInterval() != Long.MAX_VALUE) {
// the periodic checkpoint scheduler is activated and deactivated as a result of
// job status changes (running -> on, all other states -> off)
registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());
}
this.stateBackendName = checkpointStateBackend.getClass().getSimpleName();
}
创建一个listener监听器checkpointCoordinator,并注册到jobStatusListeners列表中,在job任务出现改变时会进行一些相应的操作。
// CheckpointCoordinator类
public JobStatusListener createActivatorDeactivator() {
synchronized (lock) {
if (shutdown) {
throw new IllegalArgumentException("Checkpoint coordinator is shut down");
}
if (jobStatusListener == null) {
jobStatusListener = new CheckpointCoordinatorDeActivator(this);
}
return jobStatusListener;
}
}
// ExecutionGraph类
// 添加到列表中
public void registerJobStatusListener(JobStatusListener listener) {
if (listener != null) {
jobStatusListeners.add(listener);
}
}
Jobmanager启动checkpoint调度
在JobManager端开始进行任务调度的时候,会对job的状态进行转换,由CREATED转成RUNNING,实现在transitionState()方法中,在这个过程中刚才设置的job监听器CheckpointCoordinatorDeActivator就开始启动checkpoint的定时任务了。调用过程从Dispatcher.startJobManagerRunner开始,
JobManagerRunnerImpl.start() -> StandaloneLeaderElectionService.start() -> JobManagerRunnerImpl.grantLeadership() -> verifyJobSchedulingStatusAndStartJobManager() -> startJobMaster() -> JobMaster.start() -> startJobExecution() -> resetAndStartScheduler() -> startScheduling() -> SchedulerBase.startScheduling() -> DefaultScheduler.startSchedulingInternal() -> SchedulerBase.prepareExecutionGraphForNgScheduling() -> ExecutionGraph.transitionToRunning() -> transitionState() -> notifyJobStatusChange() -> CheckpointCoordinatorDeActivator.jobStatusChanges() -> CheckpointCoordinator.startCheckpointScheduler()
源码如下:
// ExecutionGraph类
public void transitionToRunning() {
if (!transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {
throw new IllegalStateException("Job may only be scheduled from state " + JobStatus.CREATED);
}
}
public boolean transitionState(JobStatus current, JobStatus newState) {
return transitionState(current, newState, null);
}
private boolean transitionState(JobStatus current, JobStatus newState, Throwable error) {
assertRunningInJobMasterMainThread();
// consistency check
if (current.isTerminalState()) {
String message = "Job is trying to leave terminal state " + current;
LOG.error(message);
throw new IllegalStateException(message);
}
// now do the actual state transition
if (state == current) {
state = newState;
LOG.info("Job {} ({}) switched from state {} to {}.", getJobName(), getJobID(), current, newState, error);
stateTimestamps[newState.ordinal()] = System.currentTimeMillis();
notifyJobStatusChange(newState, error);
return true;
}
else {
return false;
}
}
private void notifyJobStatusChange(JobStatus newState, Throwable error) {
if (jobStatusListeners.size() > 0) {
final long timestamp = System.currentTimeMillis();
final Throwable serializedError = error == null ? null : new SerializedThrowable(error);
for (JobStatusListener listener : jobStatusListeners) {
try {
listener.jobStatusChanges(getJobID(), newState, timestamp, serializedError);
} catch (Throwable t) {
LOG.warn("Error while notifying JobStatusListener", t);
}
}
}
}
// CheckpointCoordinatorDeActivator类
public void jobStatusChanges(JobID jobId, JobStatus newJobStatus, long timestamp, Throwable error) {
if (newJobStatus == JobStatus.RUNNING) {
// start the checkpoint scheduler
coordinator.startCheckpointScheduler();
} else {
// anything else should stop the trigger for now
coordinator.stopCheckpointScheduler();
}
}
CheckpointCoordinator会部署一个定时任务,用于周期性的触发checkpoint,这个定时任务就是CheckpointCoordinator内部类ScheduledTrigger类
// CheckpointCoordinator类
public void startCheckpointScheduler() {
synchronized (lock) {
if (shutdown) {
throw new IllegalArgumentException("Checkpoint coordinator is shut down");
}
// make sure all prior timers are cancelled
stopCheckpointScheduler();
periodicScheduling = true;
currentPeriodicTrigger = scheduleTriggerWithDelay(getRandomInitDelay());
}
}
private ScheduledFuture<?> scheduleTriggerWithDelay(long initDelay) {
return timer.scheduleAtFixedRate(
new ScheduledTrigger(),
initDelay, baseInterval, TimeUnit.MILLISECONDS);
}
private final class ScheduledTrigger implements Runnable {
@Override
public void run() {
try {
triggerCheckpoint(true);
}
catch (Exception e) {
LOG.error("Exception while triggering checkpoint for job {}.", job, e);
}
}
}
ScheduledTrigger定时触发checkpoint,触发过程中:
1、在触发checkpoint之前先做一遍检查,检查当前正在处理的checkpoint是否超过设置的最大并发checkpoint数量,检查checkpoint的间隔是否达到设置的两次checkpoint的时间间隔,在都没有问题的情况下才可以触发checkpoint
2、获取所有的executions task,即所有的source task,过程中检查需要触发的task是否都正常运行。
3、获取所有的ackTasks,这里就是所有运行task,过程中需要确认checkpoint信息的task是否正常运行。所有的task最终都需要向JobManager发送确认自己checkpoint的信息。
4、正式开始触发checkpoint,创建一个PendingCheckpoint,包含了checkpointID和timestamp,向所有的source task去触发checkpoint
//CheckpointCoordinator类
public CompletableFuture<CompletedCheckpoint> triggerCheckpoint(
CheckpointProperties props,
@Nullable String externalSavepointLocation,
boolean isPeriodic,
boolean advanceToEndOfTime) {
if (advanceToEndOfTime && !(props.isSynchronous() && props.isSavepoint())) {
return FutureUtils.completedExceptionally(new IllegalArgumentException(
"Only synchronous savepoints are allowed to advance the watermark to MAX."));
}
CheckpointTriggerRequest request = new CheckpointTriggerRequest(props, externalSavepointLocation, isPeriodic, advanceToEndOfTime);
requestDecider
.chooseRequestToExecute(request, isTriggering, lastCheckpointCompletionRelativeTime)
.ifPresent(this::startTriggeringCheckpoint);
return request.onCompletionPromise;
}
// 触发前的检查
Optional<CheckpointTriggerRequest> chooseRequestToExecute(CheckpointTriggerRequest newRequest, boolean isTriggering, long lastCompletionMs) {
synchronized (lock) {
if (queuedRequests.size() >= maxQueuedRequests && !queuedRequests.last().isPeriodic) {
// there are only non-periodic (ie user-submitted) requests enqueued - r