文章目录
周期触发
检查点协调器,协调operator和state的分布式快照,触发机制基于定时器的周期性触发。
//CheckpointCoordinator
//未完成的检查点集合
private final Map<Long, PendingCheckpoint> pendingCheckpoints;
//需要触发检查点的任务集合(集合中只包含Source)
private final ExecutionVertex[] tasksToTrigger;
//需要等待确认检查点已经完成的任务集合
private final ExecutionVertex[] tasksToWaitFor;
//当前周期的任务的future,可以判断当前周期定时任务是否启动,同时可以对其取消。
private ScheduledFuture<?> currentPeriodicTrigger;
//两个检查点之间最小的间隔时间
private final long minPauseBetweenCheckpointsNanos;
//上一次检查点完成的时间
private long lastCheckpointCompletionNanos;
//允许的并发处理的检查点数目的阈值
private final int maxConcurrentCheckpointAttempts;
//表示是否一个检查点的触发请求不能被立即执行
private boolean triggerRequestQueued;
//是否已经停止
private volatile boolean shutdown;
//CheckpointCoordinator
public void startCheckpointScheduler() {
synchronized (lock) {
...
//先停止检查点调度
stopCheckpointScheduler();
//设置periodicScheduling 为true
periodicScheduling = true;
//周期调度检查点任务
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(),
baseInterval, baseInterval, TimeUnit.MILLISECONDS);
}
}
private final class ScheduledTrigger implements Runnable {
@Override
public void run() {
//...异常捕获
triggerCheckpoint(System.currentTimeMillis(), true);
...
}
}
下面我们来看看triggerCheckpoint方法:
1.检查coordinator自身状态,如果shutdown,返回COORDINATOR_SHUTDOWN错误
2.检查与上次checkpoint的时间间隔
3.检查当前的并发checkpoint数是否超过限制
4.以上3点检查通过以后,确认即将触发检查点的所有Task和需要响应checkpoint的ACK(ack涉及到checkpoint的两阶段提交,后面会讲)的task的状态都是Running状态则出发检查点,否则返回NOT_ALL_REQUIRED_TASKS_RUNNING错误
//CheckpointCoordinator
public CheckpointTriggerResult triggerCheckpoint(
long timestamp,
CheckpointProperties props,
@Nullable String externalSavepointLocation,
boolean isPeriodic) {
synchronized (lock) {
//...异常检测部分省略
//如果已经shutdown,返回COORDINATOR_SHUTDOWN错误
//如果isPeriodic为true并且periodicScheduling为false则返回PERIODIC_SCHEDULER_SHUTDOWN错误
if (!props.forceCheckpoint()) {
//如果检查点的触发请求不能被立即执行则返回ALREADY_QUEUED错误
if (triggerRequestQueued) {
//返回ALREADY_QUEUED
}
//如果未完成的个数大于maxConcurrentCheckpointAttempts,则将triggerRequestQueued标记为true,如果定时任务已经启动,则取消并返回TOO_MANY_CONCURRENT_CHECKPOINTS错误。
if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
triggerRequestQueued = true;
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
//返回TOO_MANY_CONCURRENT_CHECKPOINTS
}
//如果在最小间隔时间到了还没有触发当次检查点,如果定时任务已经启动,则取消,并重新调度检查点任务。并返回MINIMUM_TIME_BETWEEN_CHECKPOINTS错误
final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
final long durationTillNextMillis = (earliestNext - System.nanoTime()) / 1_000_000;
if (durationTillNextMillis > 0) {
if (currentPeriodicTrigger != null) {
currentPeriodicTrigger.cancel(false);
currentPeriodicTrigger = null;
}
currentPeriodicTrigger = timer.scheduleAtFixedRate(
new ScheduledTrigger(),
durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
//返回MINIMUM_TIME_BETWEEN_CHECKPOINTS错误
}
}
}
//检查所有需要被触发检查点的task任务的状态,如果存在至少一个不是RUNNING,则不会触发检查点,并立即返回NOT_ALL_REQUIRED_TASKS_RUNNING错误。
Execution[] executions = new Execution[tasksToTrigger.length];
for (int i = 0; i < tasksToTrigger.length; i++) {
Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
if (ee != null && ee.getState() == ExecutionState.RUNNING) {
executions[i] = ee;
} else {
//返回NOT_ALL_REQUIRED_TASKS_RUNNING
}
}
//检查所有需要确认检查点的任务都处于运行状态,否则立即返回NOT_ALL_REQUIRED_TASKS_RUNNING错误
Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);
for (ExecutionVertex ev : tasksToWaitFor) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ackTasks.put(ee.getAttemptId(), ev);
} else {
//返回NOT_ALL_REQUIRED_TASKS_RUNNING
}
}
synchronized (triggerLock) {
final CheckpointStorageLocation checkpointStorageLocation;
final long checkpointID;
try {
//获取checkpointID,递增ID。
checkpointID = checkpointIdCounter.getAndIncrement();
//获取checkpointStorageLocation
checkpointStorageLocation = props.isSavepoint() ?
checkpointStorage.initializeLocationForSavepoint(checkpointID, externalSavepointLocation) :
checkpointStorage.initializeLocationForCheckpoint(checkpointID);
}
catch (Throwable t) {
//异常处理
}
//创建PendingCheckpoint对象,表示一个待处理的检查点
final PendingCheckpoint checkpoint = new PendingCheckpoint(
job,
checkpointID,
timestamp,
ackTasks,
props,
checkpointStorageLocation,
executor);
if (statsTracker != null) {
PendingCheckpointStats callback = statsTracker.reportPendingCheckpoint(
checkpointID,
timestamp,
props);
checkpoint.setStatsCallback(callback);
}
//定义一个针对当前检查点超时进行资源清理的取消器canceller。该取消器主要是针对检查点没有释放资源的情况进行资源释放操作,同时还会调用triggerQueuedRequests方法启动一个触发检查点的定时任务,如果有的话(取决于triggerRequestQueued是否为true)。
final Runnable canceller = () -> {
synchronized (lock) {
if (!checkpoint.isDiscarded()) {
checkpoint.abortExpired();
pendingCheckpoints.remove(checkpointID);
rememberRecentCheckpointId(checkpointID);
triggerQueuedRequests();
}
}
};
//对上面的是否新建检查点的判断条件做二次检查,防止产生竞态条件。
try {
synchronized (lock) {
//二次检查,代码和上面一样,这里省略。
//如果检查满足条件,将其加入pendingCheckpoints集合中
pendingCheckpoints.put(checkpointID, checkpoint);
//设置超时取消器
ScheduledFuture<?> cancellerHandle = timer.schedule(
canceller,
checkpointTimeout, TimeUnit.MILLISECONDS);
if (!checkpoint.setCancellerHandle(cancellerHandle)) {
cancellerHandle.cancel(false);
}
// trigger the master hooks for the checkpoint
final List<MasterState> masterStates = MasterHooks.triggerMasterHooks(masterHooks.values(),
checkpointID, timestamp, executor, Time.milliseconds(checkpointTimeout));
for (MasterState s : masterStates) {
checkpoint.addMasterState(s);
}
}
// end of lock scope
final CheckpointOptions checkpointOptions = new CheckpointOptions(
props.getCheckpointType(),
checkpointStorageLocation.getLocationReference());
// 发送消息给task真正触发检查点
for (Execution execution: executions) {
execution.triggerCheckpoint(checkpointID, timestamp, checkpointOptions);
}
numUnsuccessfulCheckpointsTriggers.set(0);
return new CheckpointTriggerResult(checkpoint);
}
catch (Throwable t) {
//异常捕获
}
} // end trigger lock
}
恢复检查点(待补充)
public boolean restoreLatestCheckpointedState(
Map<JobVertexID, ExecutionJobVertex> tasks,
boolean errorIfNoCheckpoint,
boolean allowNonRestoredState) throws Exception {
//可以基于zookeeper来恢复
}
消息驱动
Flink1.9使用的是RPC来实现JobMaster和TaskExecutor之间的通信。
public class CheckpointCoordinatorDeActivator implements JobStatusListener {
private final CheckpointCoordinator coordinator;
public CheckpointCoordinatorDeActivator(CheckpointCoordinator coordinator) {
this.coordinator = checkNotNull(coordinator);
}
@Override
public void jobStatusChanges(JobID jobId, JobStatus newJobStatus, long timestamp, Throwable error) {
if (newJobStatus == JobStatus.RUNNING) {
// 启动检查点调度任务
coordinator.startCheckpointScheduler();
} else {
// 停止检查点调度任务
coordinator.stopCheckpointScheduler();
}
}
}
CheckpointCoordinatorDeActivator 实现了JobStatusListener,来监听任务状态变化,当JobStatus为RUNNING时,会启动检查点调度任务,否则会停止检查点调度任务。下面我们来看下检查点在通信过程中涉及到哪些消息。
AbstractCheckpointMessage是检查点消息的基础类:
message属性 | 描述 |
---|---|
job | JobID的实例,表示当前这条消息实例的归属 |
taskExecutionId | ExecutionAttemptID的实例,表示检查点的源/目的任务 |
checkpointId | 检查点ID |
每个消息都是在CheckpointCoordinator和Task之间传递,触发不同的事件,下面描述了每个消息的发送方向及作用:
消息 | 发送方向 | 描述 |
---|---|---|
TriggerCheckpoint | JobManager->TaskManager | 告诉一个task触发其检查点 |
DeclineCheckpoint | TaskManager->JobManager | 告诉检查点协调器,检查点的请求还没有能够被处理,这种情况通常发生于:某task已处于RUNNING状态,但在内部可能还没有准备好执行检查点。 |
AcknowledgeCheckpoint | TaskManager->JobManager | 应答信号,表示某个独立的task的检查点已经完成 |
NotifyCheckpointComplete | JobManager->TaskManager | 告诉一个task它的检查点已经得到完成确认,task可以向第三方提交该检查点 |
TriggerCheckpoint
TriggerCheckpoint是用来描述触发检查点的消息。在上面分析CheckpointCoordinator的triggerCheckpoint方法的时候,其中方法末尾处有一段代码如下,作用是发送消息给task真正触发检查点。
//CheckpointCoordinator
for (Execution execution: executions) {
execution.triggerCheckpoint(checkpointID, timestamp, checkpointOptions);
}
//Execution
public void triggerCheckpoint(long checkpointId, long timestamp, CheckpointOptions checkpointOptions) {
triggerCheckpointHelper(checkpointId, timestamp, checkpointOptions, false);
}
private void triggerCheckpointHelper(long checkpointId, long timestamp, CheckpointOptions checkpointOptions, boolean advanceToEndOfEventTime) {
//获取checkpoint类型
final CheckpointType checkpointType = checkpointOptions.getCheckpointType();
if (advanceToEndOfEventTime && !(checkpointType.isSynchronous() && checkpointType.isSavepoint())) {
//如果类型是异步的并且是保存点抛异常
}
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
taskManagerGateway.triggerCheckpoint(attemptId, getVertex().getJobId(), checkpointId, timestamp, checkpointOptions, advanceToEndOfEventTime);
} else {
//
}
}
//RpcTaskManagerGateway
public void triggerCheckpoint(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long timestamp, CheckpointOptions checkpointOptions, boolean advanceToEndOfEventTime) {
//通过RPC将消息发送至TaskExecutor
taskExecutorGateway.triggerCheckpoint(
executionAttemptID,
checkpointId,
timestamp,
checkpointOptions,
advanceToEndOfEventTime);
}
下面我们看看TaskExecutor是如何处理的
//TaskExecutor
public CompletableFuture<Acknowledge> triggerCheckpoint(
ExecutionAttemptID executionAttemptID,
long checkpointId,
long checkpointTimestamp,
CheckpointOptions checkpointOptions,
boolean advanceToEndOfEventTime) {
final CheckpointType checkpointType = checkpointOptions.getCheckpointType();
if (advanceToEndOfEventTime && !(checkpointType.isSynchronous() && checkpointType.isSavepoint())) {
//再次判断如果类型是异步的并且是保存点抛异常
}
//通过executionAttemptID获取对应的task
final Task task = taskSlotTable.getTask(executionAttemptID);
if (task != null) {
task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, checkpointOptions, advanceToEndOfEventTime);
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
//未知task返回异常
}
}
当TaskManager收到TriggerCheckpoint消息时,会向SourceStreamTask发送Barrier,并保存自身的状态快照,在后面专门分析Barrier的时候再详细讨论。
DeclineCheckpoint
DeclineCheckpoint是TaskMaster通知JobMaster,检查点的请求还没有能够被处理。JobMaster会从pendingCheckpoints中删除该检查点,并释放该检查点的资源,最后重新触发一次新的检查点。
//Task
public void triggerCheckpointBarrier(
final long checkpointID,
long checkpointTimestamp,
final CheckpointOptions checkpointOptions) {
if (executionState == ExecutionState.RUNNING && invokable != null) {
// build a local closure
Runnable runnable = new Runnable() {
@Override
public void run() {
FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(safetyNetCloseableRegistry);
try {
boolean success = invokable.triggerCheckpoint(checkpointMetaData, checkpointOptions);
if (!success) {
//case 1:triggerCheckpoint返回不成功时,发送DeclineCheckpoint消息
checkpointResponder.declineCheckpoint(
getJobID(), getExecutionId(), checkpointID,
new CheckpointDeclineTaskNotReadyException(taskName));
}
}
catch (Throwable t) {
//异常处理
} finally {
FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(null);
}
}
};
//
}
else {
//case 2:Task不处于RUNNING状态,发送DeclineCheckpoint消息
checkpointResponder.declineCheckpoint(jobId, executionId, checkpointID,
new CheckpointDeclineTaskNotReadyException(taskNameWithSubtask));
}
}
//RpcCheckpointResponder
public void declineCheckpoint(
JobID jobID,
ExecutionAttemptID executionAttemptID,
long checkpointId,
Throwable cause) {
checkpointCoordinatorGateway.declineCheckpoint(new DeclineCheckpoint(jobID,
executionAttemptID,
checkpointId,
cause));
}
接下来我们看看,当JobMaster收到消息是如何做处理的
//JobManager
public void declineCheckpoint(DeclineCheckpoint decline) {
final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
if (checkpointCoordinator != null) {
getRpcService().execute(() -> {
try {
checkpointCoordinator.receiveDeclineMessage(decline);
} catch (Exception e) {
//输出错误日志
}
});
} else {
//checkpointCoordinator为空,输出错误日志
}
}
1.判断是否为shutdown、jobId是否一致
2.如果检查通过,从pendingCheckpoints中删除该检查点,释放检查点
//CheckpointCoordinator
public void receiveDeclineMessage(DeclineCheckpoint message) {
...
//获取检查点ID
final long checkpointId = message.getCheckpointId();
final String reason = (message.getReason() != null ? message.getReason().getMessage() : "");
PendingCheckpoint checkpoint;
synchronized (lock) {
...
//从pendingCheckpoints中删除该检查点
checkpoint = pendingCheckpoints.remove(checkpointId);
if (checkpoint != null && !checkpoint.isDiscarded()) {
//释放检查点
discardCheckpoint(checkpoint, message.getReason());
}
else if (checkpoint != null) {
//抛异常
}
else if (LOG.isDebugEnabled()) {
//debug日志打印
}
}
}
private void discardCheckpoint(PendingCheckpoint pendingCheckpoint, @Nullable Throwable cause) {
//...
final long checkpointId = pendingCheckpoint.getCheckpointId();
final String reason = (cause != null) ? cause.getMessage() : "";
pendingCheckpoint.abortDeclined();
rememberRecentCheckpointId(checkpointId);
boolean haveMoreRecentPending = false;
for (PendingCheckpoint p : pendingCheckpoints.values()) {
//pendingCheckpoints中存在某个检查点:资源没有释放并且检查点ID比pendingCheckpoint的大
if (!p.isDiscarded() && p.getCheckpointId() >= pendingCheckpoint.getCheckpointId()) {
haveMoreRecentPending = true;
break;
}
}
if (!haveMoreRecentPending) {
//触发入队的定时任务
triggerQueuedRequests();
}
}
private void rememberRecentCheckpointId(long id) {
//如果recentPendingCheckpoints的大小超过NUM_GHOST_CHECKPOINT_IDS默认16,则删除第一个元素。
if (recentPendingCheckpoints.size() >= NUM_GHOST_CHECKPOINT_IDS) {
recentPendingCheckpoints.removeFirst();
}
//将该检查点ID加入recentPendingCheckpoints的末尾
recentPendingCheckpoints.addLast(id);
}
AcknowledgeCheckpoint
AcknowledgeCheckpoint是由TaskExecutor发给JobMaster的消息,表示TaskExecutor已经完成了当次Checkpoint并将状态的句柄交给JobMaster保存起来。
上面我们提到,当TaskExecutor收到TriggerCheckpoint消息时,会执行triggerCheckpointBarrier方法。
//Task
public void triggerCheckpointBarrier(
final long checkpointID,
long checkpointTimestamp,
final CheckpointOptions checkpointOptions) {
final AbstractInvokable invokable = this.invokable;
final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp);
if (executionState == ExecutionState.RUNNING && invokable != null) {
//...
Runnable runnable = new Runnable() {
@Override
public void run() {
//...
try {
boolean success = invokable.triggerCheckpoint(checkpointMetaData, checkpointOptions);
...
}
catch (Throwable t) {
//...
} finally {
//...
}
}
};
//...
}
else {
//...
}
}
如果Task是Running状态,则向每个operator中发送Barrier。否则发送CancelCheckpointMarker。
//StreamTask
public boolean triggerCheckpoint(CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions) throws Exception {
try {
CheckpointMetrics checkpointMetrics = new CheckpointMetrics()
.setBytesBufferedInAlignment(0L)
.setAlignmentDurationNanos(0L);
return performCheckpoint(checkpointMetaData, checkpointOptions, checkpointMetrics);
}
catch (Exception e) {
//...
}
}
private boolean performCheckpoint(
CheckpointMetaData checkpointMetaData,
CheckpointOptions checkpointOptions,
CheckpointMetrics checkpointMetrics) throws Exception {
synchronized (lock) {
if (isRunning) {
//预发送Barrier之前做准备工作
operatorChain.prepareSnapshotPreBarrier(checkpointMetaData.getCheckpointId());
//向每个operator中发送Barrier
operatorChain.broadcastCheckpointBarrier(
checkpointMetaData.getCheckpointId(),
checkpointMetaData.getTimestamp(),
checkpointOptions);
checkpointState(checkpointMetaData, checkpointOptions, checkpointMetrics);
return true;
}
else {//状态不为RUNNING
final CancelCheckpointMarker message = new CancelCheckpointMarker(checkpointMetaData.getCheckpointId());
Exception exception = null;
for (StreamRecordWriter<StreamRecord<?>> streamRecordWriter : streamRecordWriters) {
try {
//写入CancelCheckpointMarker,标记CheckPoint将取消
streamRecordWriter.broadcastEvent(message);
} catch (Exception e) {
//...
}
}
//...
return false;
}
}
}
private void checkpointState(
CheckpointMetaData checkpointMetaData,
CheckpointOptions checkpointOptions,
CheckpointMetrics checkpointMetrics) throws Exception {
//初始化外部存储
CheckpointStreamFactory storage = checkpointStorage.resolveCheckpointStorageLocation(
checkpointMetaData.getCheckpointId(),
checkpointOptions.getTargetLocation());
CheckpointingOperation checkpointingOperation = new CheckpointingOperation(
this,
checkpointMetaData,
checkpointOptions,
storage,
checkpointMetrics);
checkpointingOperation.executeCheckpointing();
}
public void executeCheckpointing() throws Exception {
startSyncPartNano = System.nanoTime();
try {
for (StreamOperator<?> op : allOperators) {
//执行StreamOperator的snapshotState方法,并将operatorId和Future放入operatorSnapshotsInProgress集合中
checkpointStreamOperator(op);
}
//...
//启动AsyncCheckpointRunnable(异步检查)线程
AsyncCheckpointRunnable asyncCheckpointRunnable = new AsyncCheckpointRunnable(
owner,
operatorSnapshotsInProgress,
checkpointMetaData,
checkpointMetrics,
startAsyncPartNano);
owner.cancelables.registerCloseable(asyncCheckpointRunnable);
owner.asyncOperationsThreadPool.submit(asyncCheckpointRunnable);
//...
} catch (Exception ex) {
//异常处理
}
}
下面我们看看AsyncCheckpointRunnable的run方法
//AsyncCheckpointRunnable
public void run() {
FileSystemSafetyNet.initializeSafetyNetForThread();
try {
TaskStateSnapshot jobManagerTaskOperatorSubtaskStates =
new TaskStateSnapshot(operatorSnapshotsInProgress.size());
TaskStateSnapshot localTaskOperatorSubtaskStates =
new TaskStateSnapshot(operatorSnapshotsInProgress.size());
//遍历正在进行快照的operator
for (Map.Entry<OperatorID, OperatorSnapshotFutures> entry : operatorSnapshotsInProgress.entrySet()) {
OperatorID operatorID = entry.getKey();
OperatorSnapshotFutures snapshotInProgress = entry.getValue();
OperatorSnapshotFinalizer finalizedSnapshots =
new OperatorSnapshotFinalizer(snapshotInProgress);
//放入发给jobmaster的状态
jobManagerTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(
operatorID,
finalizedSnapshots.getJobManagerOwnedState());
//放入task本地的状态
localTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(
operatorID,
finalizedSnapshots.getTaskLocalState());
}
//...
if (asyncCheckpointState.compareAndSet(CheckpointingOperation.AsyncCheckpointState.RUNNING,
CheckpointingOperation.AsyncCheckpointState.COMPLETED)) {
reportCompletedSnapshotStates(
jobManagerTaskOperatorSubtaskStates,
localTaskOperatorSubtaskStates,
asyncDurationMillis);
} else {
//...
}
} catch (Exception e) {
handleExecutionException(e);
} finally {
owner.cancelables.unregisterCloseable(this);
FileSystemSafetyNet.closeSafetyNetAndGuardedResourcesForThread();
}
}
private void reportCompletedSnapshotStates(
TaskStateSnapshot acknowledgedTaskStateSnapshot,
TaskStateSnapshot localTaskStateSnapshot,
long asyncDurationMillis) {
//...
taskStateManager.reportTaskStateSnapshots(
checkpointMetaData,
checkpointMetrics,
hasAckState ? acknowledgedTaskStateSnapshot : null,
hasLocalState ? localTaskStateSnapshot : null);
//...
}
//TaskStateManagerImpl
public void reportTaskStateSnapshots(
@Nonnull CheckpointMetaData checkpointMetaData,
@Nonnull CheckpointMetrics checkpointMetrics,
@Nullable TaskStateSnapshot acknowledgedState,
@Nullable TaskStateSnapshot localState) {
long checkpointId = checkpointMetaData.getCheckpointId();
//将localState保存到storedTaskStateByCheckpointID中
localStateStore.storeLocalState(checkpointId, localState);
//发送AcknowledgeCheckpoint消息给JobManager,其中包括acknowledgedState,也就是Task的状态
checkpointResponder.acknowledgeCheckpoint(
jobId,
executionAttemptID,
checkpointId,
checkpointMetrics,
acknowledgedState);
}
//RpcCheckpointResponder
public void acknowledgeCheckpoint(
JobID jobID,
ExecutionAttemptID executionAttemptID,
long checkpointId,
CheckpointMetrics checkpointMetrics,
TaskStateSnapshot subtaskState) {
checkpointCoordinatorGateway.acknowledgeCheckpoint(
jobID,
executionAttemptID,
checkpointId,
checkpointMetrics,
subtaskState);
}
当JobMaster收到AcknowledgeCheckpoint消息时,会触发如下操作:
1.将PendingCheckpoint转换为CompletedCheckpoint
2.发送NotifyCheckpointComplete消息,触发状态跟踪器的onCompletedCheckpoint回调方法
//JobMaster
public void acknowledgeCheckpoint(
final JobID jobID,
final ExecutionAttemptID executionAttemptID,
final long checkpointId,
final CheckpointMetrics checkpointMetrics,
final TaskStateSnapshot checkpointState) {
final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint(
jobID,
executionAttemptID,
checkpointId,
checkpointMetrics,
checkpointState);
if (checkpointCoordinator != null) {
getRpcService().execute(() -> {
try {
checkpointCoordinator.receiveAcknowledgeMessage(ackMessage);
} catch (Throwable t) {
//打印错误日志
}
});
} else {
//打印错误日志
}
}
1.判断是否为shutdown、jobId是否一致
2.如果检查通过,从notYetAcknowledgedTasks中删除该ExecutionVertex,acknowledgedTasks中加入该executionAttemptId,并从operatorSubtaskStates中获取每个operator的状态句柄保存到operatorState中。
3.如果2返回成功,向sharedStateRegistry中注册状态句柄
4.将pendingCheckpoint变成completedCheckpoint,并加入completedCheckpointStore中,从pendingCheckpoints中删除该检查点,触发新一次检查点,最后调用notifyCheckpointComplete方法通知task可以向第三方提交检查点。
//CheckpointCoordinator
public boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message) throws CheckpointException {
...
final long checkpointId = message.getCheckpointId();
synchronized (lock) {
...
final PendingCheckpoint checkpoint = pendingCheckpoints.get(checkpointId);
if (checkpoint != null && !checkpoint.isDiscarded()) {
//检查点首先应答相关的task,检查点已经完全应答完成
switch (checkpoint.acknowledgeTask(message.getTaskExecutionId(), message.getSubtaskState(), message.getCheckpointMetrics())) {
case SUCCESS:
checkpointId, message.getTaskExecutionId(), message.getJob());
if (checkpoint.isFullyAcknowledged()) {//如果收到了所有task的确认消息,则结束本轮checkpoint
completePendingCheckpoint(checkpoint);
}
break;
...
}
return true;
}
else if (checkpoint != null) {
...
}
else {
...
}
}
}
private void completePendingCheckpoint(PendingCheckpoint pendingCheckpoint) throws CheckpointException {
final long checkpointId = pendingCheckpoint.getCheckpointId();
final CompletedCheckpoint completedCheckpoint;
Map<OperatorID, OperatorState> operatorStates = pendingCheckpoint.getOperatorStates();
//向sharedStateRegistry中注册状态句柄
sharedStateRegistry.registerAll(operatorStates.values());
try {
try {
//将检查点转换成CompletedCheckpoint
completedCheckpoint = pendingCheckpoint.finalizeCheckpoint();
}
catch (Exception e1) {
//...
}
if (!completedCheckpoint.getProperties().isSavepoint()) {
try {
//加入completedCheckpointStore列表
completedCheckpointStore.addCheckpoint(completedCheckpoint);
} catch (Exception exception) {
...
}
//会从pendingCheckpoints中diacard所有时间戳小于当前检查点的时间戳,并从集合中移除
dropSubsumedCheckpoints(checkpointId);
}
} finally {
//从pendingCheckpoints中移除
pendingCheckpoints.remove(checkpointId);
triggerQueuedRequests();
}
rememberRecentCheckpointId(checkpointId);
lastCheckpointCompletionNanos = System.nanoTime();
final long timestamp = completedCheckpoint.getTimestamp();
for (ExecutionVertex ev : tasksToCommitTo) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
//发送NotifyCheckpointComplete消息,触发状态跟踪器的onCompletedCheckpoint回调方法
ee.notifyCheckpointComplete(checkpointId, timestamp);
}
}
}
//PendingCheckpoint
public TaskAcknowledgeResult acknowledgeTask(
ExecutionAttemptID executionAttemptId,
TaskStateSnapshot operatorSubtaskStates,
CheckpointMetrics metrics) {
synchronized (lock) {
if (discarded) {
return TaskAcknowledgeResult.DISCARDED;
}
//从notYetAcknowledgedTasks中删除
final ExecutionVertex vertex = notYetAcknowledgedTasks.remove(executionAttemptId);
if (vertex == null) {
if (acknowledgedTasks.contains(executionAttemptId)) {
return TaskAcknowledgeResult.DUPLICATE;
} else {
return TaskAcknowledgeResult.UNKNOWN;
}
} else {
//acknowledgedTasks中新增executionAttemptId
acknowledgedTasks.add(executionAttemptId);
}
List<OperatorID> operatorIDs = vertex.getJobVertex().getOperatorIDs();
int subtaskIndex = vertex.getParallelSubtaskIndex();
long ackTimestamp = System.currentTimeMillis();
long stateSize = 0L;
if (operatorSubtaskStates != null) {
for (OperatorID operatorID : operatorIDs) {
OperatorSubtaskState operatorSubtaskState =
operatorSubtaskStates.getSubtaskStateByOperatorID(operatorID);
if (operatorSubtaskState == null) {
operatorSubtaskState = new OperatorSubtaskState();
}
OperatorState operatorState = operatorStates.get(operatorID);
if (operatorState == null) {
//创建operatorState
operatorState = new OperatorState(
operatorID,
vertex.getTotalNumberOfParallelSubtasks(),
vertex.getMaxParallelism());
//向operatorStates中插入operatorState
operatorStates.put(operatorID, operatorState);
}
operatorState.putState(subtaskIndex, operatorSubtaskState);
stateSize += operatorSubtaskState.getStateSize();
}
}
++numAcknowledgedTasks;
//...
return TaskAcknowledgeResult.SUCCESS;
}
}
NotifyCheckpointComplete
在上面的receiveAcknowledgeMessage方法中,当检查点ack task完成,将消息转换为CompletedCheckpoint,最会调用notifyCheckpointComplete发送NotifyCheckpointComplete消息
//CheckpointCoordinator
private void completePendingCheckpoint(PendingCheckpoint pendingCheckpoint) throws CheckpointException {
...
for (ExecutionVertex ev : tasksToCommitTo) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ee.notifyCheckpointComplete(checkpointId, timestamp);
}
}
}
//Execution
public void notifyCheckpointComplete(long checkpointId, long timestamp) {
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
taskManagerGateway.notifyCheckpointComplete(attemptId, getVertex().getJobId(), checkpointId, timestamp);
} else {
//打印日志
}
}
//RpcTaskManagerGateway
public void notifyCheckpointComplete(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long timestamp) {
taskExecutorGateway.confirmCheckpoint(executionAttemptID, checkpointId, timestamp);
}
TaskExecutor收到会作如下处理
//TaskExecutor
public CompletableFuture<Acknowledge> confirmCheckpoint(
ExecutionAttemptID executionAttemptID,
long checkpointId,
long checkpointTimestamp) {
//从taskSlotTable中获取task
final Task task = taskSlotTable.getTask(executionAttemptID);
if (task != null) {
task.notifyCheckpointComplete(checkpointId);
return CompletableFuture.completedFuture(Acknowledge.get());
} else {
//
return FutureUtils.completedExceptionally(new CheckpointException(message));
}
}
//Task
public void notifyCheckpointComplete(final long checkpointID) {
final AbstractInvokable invokable = this.invokable;
if (executionState == ExecutionState.RUNNING && invokable != null) {
Runnable runnable = new Runnable() {
@Override
public void run() {
try {
invokable.notifyCheckpointComplete(checkpointID);
taskStateManager.notifyCheckpointComplete(checkpointID);
} catch (Throwable t) {
if (getExecutionState() == ExecutionState.RUNNING) {
//异常
}
}
}
};
executeAsyncCallRunnable(
runnable,
"Checkpoint Confirmation for " + taskNameWithSubtask,
false);
}
else {
//打印日志
}
}
最后触发Task的notifyCheckpointComplete方法。至此,整个检查点才算完成,在notifyCheckpointComplete可以实现task向第三方提交检查点等操作。