use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.
the class SubtaskCheckpointCoordinatorImpl method checkpointState.
@Override
public void checkpointState(CheckpointMetaData metadata, CheckpointOptions options, CheckpointMetricsBuilder metrics, OperatorChain<?, ?> operatorChain, boolean isTaskFinished, Supplier<Boolean> isRunning) throws Exception {
checkNotNull(options);
checkNotNull(metrics);
if (lastCheckpointId >= metadata.getCheckpointId()) {
LOG.info("Out of order checkpoint barrier (aborted previously?): {} >= {}", lastCheckpointId, metadata.getCheckpointId());
channelStateWriter.abort(metadata.getCheckpointId(), new CancellationException(), true);
checkAndClearAbortedStatus(metadata.getCheckpointId());
return;
}
logCheckpointProcessingDelay(metadata);
// Step (0): Record the last triggered checkpointId and abort the sync phase of checkpoint
// if necessary.
lastCheckpointId = metadata.getCheckpointId();
if (checkAndClearAbortedStatus(metadata.getCheckpointId())) {
// broadcast cancel checkpoint marker to avoid downstream back-pressure due to
// checkpoint barrier align.
operatorChain.broadcastEvent(new CancelCheckpointMarker(metadata.getCheckpointId()));
LOG.info("Checkpoint {} has been notified as aborted, would not trigger any checkpoint.", metadata.getCheckpointId());
return;
}
// connection), revert it here so that it can jump over output data
if (options.getAlignment() == CheckpointOptions.AlignmentType.FORCED_ALIGNED) {
options = options.withUnalignedSupported();
initInputsCheckpoint(metadata.getCheckpointId(), options);
}
// Step (1): Prepare the checkpoint, allow operators to do some pre-barrier work.
// The pre-barrier work should be nothing or minimal in the common case.
operatorChain.prepareSnapshotPreBarrier(metadata.getCheckpointId());
// Step (2): Send the checkpoint barrier downstream
operatorChain.broadcastEvent(new CheckpointBarrier(metadata.getCheckpointId(), metadata.getTimestamp(), options), options.isUnalignedCheckpoint());
// Step (3): Prepare to spill the in-flight buffers for input and output
if (options.isUnalignedCheckpoint()) {
// output data already written while broadcasting event
channelStateWriter.finishOutput(metadata.getCheckpointId());
}
// Step (4): Take the state snapshot. This should be largely asynchronous, to not impact
// progress of the
// streaming topology
Map<OperatorID, OperatorSnapshotFutures> snapshotFutures = new HashMap<>(operatorChain.getNumberOfOperators());
try {
if (takeSnapshotSync(snapshotFutures, metadata, metrics, options, operatorChain, isRunning)) {
finishAndReportAsync(snapshotFutures, metadata, metrics, operatorChain.isTaskDeployedAsFinished(), isTaskFinished, isRunning);
} else {
cleanup(snapshotFutures, metadata, metrics, new Exception("Checkpoint declined"));
}
} catch (Exception ex) {
cleanup(snapshotFutures, metadata, metrics, ex);
throw ex;
}
}
use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.
the class AsyncCheckpointRunnable method finalizedFinishedSnapshots.
private SnapshotsFinalizeResult finalizedFinishedSnapshots() throws Exception {
for (Map.Entry<OperatorID, OperatorSnapshotFutures> entry : operatorSnapshotsInProgress.entrySet()) {
OperatorSnapshotFutures snapshotInProgress = entry.getValue();
// We should wait for the channels states get completed before continuing,
// otherwise the alignment of barriers might have not finished yet.
snapshotInProgress.getInputChannelStateFuture().get();
snapshotInProgress.getResultSubpartitionStateFuture().get();
}
return new SnapshotsFinalizeResult(TaskStateSnapshot.FINISHED_ON_RESTORE, TaskStateSnapshot.FINISHED_ON_RESTORE, 0L);
}
use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.
the class RegularOperatorChain method buildOperatorSnapshotFutures.
private OperatorSnapshotFutures buildOperatorSnapshotFutures(CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions, StreamOperator<?> op, Supplier<Boolean> isRunning, ChannelStateWriter.ChannelStateWriteResult channelStateWriteResult, CheckpointStreamFactory storage) throws Exception {
OperatorSnapshotFutures snapshotInProgress = checkpointStreamOperator(op, checkpointMetaData, checkpointOptions, storage, isRunning);
snapshotChannelStates(op, channelStateWriteResult, snapshotInProgress);
return snapshotInProgress;
}
use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.
the class LocalStateForwardingTest method testReportingFromSnapshotToTaskStateManager.
/**
* This tests the forwarding of jm and tm-local state from the futures reported by the backends,
* through the async checkpointing thread to the {@link
* org.apache.flink.runtime.state.TaskStateManager}.
*/
@Test
public void testReportingFromSnapshotToTaskStateManager() throws Exception {
TestTaskStateManager taskStateManager = new TestTaskStateManager();
StreamMockEnvironment streamMockEnvironment = new StreamMockEnvironment(new Configuration(), new Configuration(), new ExecutionConfig(), 1024 * 1024, new MockInputSplitProvider(), 0, taskStateManager);
StreamTask testStreamTask = new StreamTaskTest.NoOpStreamTask(streamMockEnvironment);
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(0L, 0L);
CheckpointMetricsBuilder checkpointMetrics = new CheckpointMetricsBuilder();
Map<OperatorID, OperatorSnapshotFutures> snapshots = new HashMap<>(1);
OperatorSnapshotFutures osFuture = new OperatorSnapshotFutures();
osFuture.setKeyedStateManagedFuture(createSnapshotResult(KeyedStateHandle.class));
osFuture.setKeyedStateRawFuture(createSnapshotResult(KeyedStateHandle.class));
osFuture.setOperatorStateManagedFuture(createSnapshotResult(OperatorStateHandle.class));
osFuture.setOperatorStateRawFuture(createSnapshotResult(OperatorStateHandle.class));
osFuture.setInputChannelStateFuture(createSnapshotCollectionResult(InputChannelStateHandle.class));
osFuture.setResultSubpartitionStateFuture(createSnapshotCollectionResult(ResultSubpartitionStateHandle.class));
OperatorID operatorID = new OperatorID();
snapshots.put(operatorID, osFuture);
AsyncCheckpointRunnable checkpointRunnable = new AsyncCheckpointRunnable(snapshots, checkpointMetaData, checkpointMetrics, 0L, testStreamTask.getName(), asyncCheckpointRunnable -> {
}, testStreamTask.getEnvironment(), testStreamTask, false, false, () -> true);
checkpointMetrics.setAlignmentDurationNanos(0L);
checkpointMetrics.setBytesProcessedDuringAlignment(0L);
checkpointRunnable.run();
TaskStateSnapshot lastJobManagerTaskStateSnapshot = taskStateManager.getLastJobManagerTaskStateSnapshot();
TaskStateSnapshot lastTaskManagerTaskStateSnapshot = taskStateManager.getLastTaskManagerTaskStateSnapshot();
OperatorSubtaskState jmState = lastJobManagerTaskStateSnapshot.getSubtaskStateByOperatorID(operatorID);
OperatorSubtaskState tmState = lastTaskManagerTaskStateSnapshot.getSubtaskStateByOperatorID(operatorID);
performCheck(osFuture.getKeyedStateManagedFuture(), jmState.getManagedKeyedState(), tmState.getManagedKeyedState());
performCheck(osFuture.getKeyedStateRawFuture(), jmState.getRawKeyedState(), tmState.getRawKeyedState());
performCheck(osFuture.getOperatorStateManagedFuture(), jmState.getManagedOperatorState(), tmState.getManagedOperatorState());
performCheck(osFuture.getOperatorStateRawFuture(), jmState.getRawOperatorState(), tmState.getRawOperatorState());
performCollectionCheck(osFuture.getInputChannelStateFuture(), jmState.getInputChannelState(), tmState.getInputChannelState());
performCollectionCheck(osFuture.getResultSubpartitionStateFuture(), jmState.getResultSubpartitionState(), tmState.getResultSubpartitionState());
}
use of org.apache.flink.streaming.api.operators.OperatorSnapshotFutures in project flink by apache.
the class AsyncCheckpointRunnableTest method testDeclineAsyncCheckpoint.
@Test
public void testDeclineAsyncCheckpoint() {
CheckpointFailureReason originalReason = CheckpointFailureReason.CHECKPOINT_DECLINED_INPUT_END_OF_STREAM;
final Map<OperatorID, OperatorSnapshotFutures> snapshotsInProgress = new HashMap<>();
snapshotsInProgress.put(new OperatorID(), new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), ExceptionallyDoneFuture.of(new CheckpointException(originalReason)), DoneFuture.of(SnapshotResult.empty())));
final TestEnvironment environment = new TestEnvironment();
final AsyncCheckpointRunnable runnable = createAsyncRunnable(snapshotsInProgress, environment, false, true);
runnable.run();
Assert.assertSame(environment.getCause().getCheckpointFailureReason(), originalReason);
}
Aggregations