use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class Task method triggerCheckpointBarrier.
// ------------------------------------------------------------------------
// Notifications on the invokable
// ------------------------------------------------------------------------
/**
* Calls the invokable to trigger a checkpoint.
*
* @param checkpointID The ID identifying the checkpoint.
* @param checkpointTimestamp The timestamp associated with the checkpoint.
* @param checkpointOptions Options for performing this checkpoint.
*/
public void triggerCheckpointBarrier(final long checkpointID, final long checkpointTimestamp, final CheckpointOptions checkpointOptions) {
final TaskInvokable invokable = this.invokable;
final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp, System.currentTimeMillis());
if (executionState == ExecutionState.RUNNING) {
checkState(invokable instanceof CheckpointableTask, "invokable is not checkpointable");
try {
((CheckpointableTask) invokable).triggerCheckpointAsync(checkpointMetaData, checkpointOptions).handle((triggerResult, exception) -> {
if (exception != null || !triggerResult) {
declineCheckpoint(checkpointID, CheckpointFailureReason.TASK_FAILURE, exception);
return false;
}
return true;
});
} catch (RejectedExecutionException ex) {
// This may happen if the mailbox is closed. It means that the task is shutting
// down, so we just ignore it.
LOG.debug("Triggering checkpoint {} for {} ({}) was rejected by the mailbox", checkpointID, taskNameWithSubtask, executionId);
declineCheckpoint(checkpointID, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_CLOSING);
} catch (Throwable t) {
if (getExecutionState() == ExecutionState.RUNNING) {
failExternally(new Exception("Error while triggering checkpoint " + checkpointID + " for " + taskNameWithSubtask, t));
} else {
LOG.debug("Encountered error while triggering checkpoint {} for " + "{} ({}) while being not in state running.", checkpointID, taskNameWithSubtask, executionId, t);
}
}
} else {
LOG.debug("Declining checkpoint request for non-running task {} ({}).", taskNameWithSubtask, executionId);
// send back a message that we did not do the checkpoint
declineCheckpoint(checkpointID, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY);
}
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class MultipleInputStreamTask method triggerStopWithSavepointAsync.
private CompletableFuture<Boolean> triggerStopWithSavepointAsync(CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions) {
CompletableFuture<Void> sourcesStopped = new CompletableFuture<>();
final StopMode stopMode = ((SavepointType) checkpointOptions.getCheckpointType()).shouldDrain() ? StopMode.DRAIN : StopMode.NO_DRAIN;
mainMailboxExecutor.execute(() -> {
setSynchronousSavepoint(checkpointMetaData.getCheckpointId());
FutureUtils.forward(FutureUtils.waitForAll(operatorChain.getSourceTaskInputs().stream().map(s -> s.getOperator().stop(stopMode)).collect(Collectors.toList())), sourcesStopped);
}, "stop chained Flip-27 source for stop-with-savepoint --drain");
return sourcesStopped.thenCompose(ignore -> triggerSourcesCheckpointAsync(checkpointMetaData, checkpointOptions));
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class SourceStreamTask method init.
@Override
protected void init() {
// we check if the source is actually inducing the checkpoints, rather
// than the trigger
SourceFunction<?> source = mainOperator.getUserFunction();
if (source instanceof ExternallyInducedSource) {
externallyInducedCheckpoints = true;
ExternallyInducedSource.CheckpointTrigger triggerHook = new ExternallyInducedSource.CheckpointTrigger() {
@Override
public void triggerCheckpoint(long checkpointId) throws FlinkException {
// TODO - we need to see how to derive those. We should probably not
// encode this in the
// TODO - source's trigger message, but do a handshake in this task
// between the trigger
// TODO - message from the master, and the source's trigger
// notification
final CheckpointOptions checkpointOptions = CheckpointOptions.forConfig(CheckpointType.CHECKPOINT, CheckpointStorageLocationReference.getDefault(), configuration.isExactlyOnceCheckpointMode(), configuration.isUnalignedCheckpointsEnabled(), configuration.getAlignedCheckpointTimeout().toMillis());
final long timestamp = System.currentTimeMillis();
final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, timestamp, timestamp);
try {
SourceStreamTask.super.triggerCheckpointAsync(checkpointMetaData, checkpointOptions).get();
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new FlinkException(e.getMessage(), e);
}
}
};
((ExternallyInducedSource<?, ?>) source).setCheckpointTrigger(triggerHook);
}
getEnvironment().getMetricGroup().getIOMetricGroup().gauge(MetricNames.CHECKPOINT_START_DELAY_TIME, this::getAsyncCheckpointStartDelayNanos);
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.
/**
* FLINK-5667
*
* <p>Tests that a concurrent cancel operation does not discard the state handles of an
* acknowledged checkpoint. The situation can only happen if the cancel call is executed after
* Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
* call.
*/
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
final OneShotLatch completeAcknowledge = new OneShotLatch();
CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
acknowledgeCheckpointLatch.trigger();
// block here so that we can issue the concurrent cancel call
while (true) {
try {
// wait until we successfully await (no pun intended)
completeAcknowledge.await();
// when await() returns normally, we break out of the loop
break;
} catch (InterruptedException e) {
// survive interruptions that arise from thread pool
// shutdown
// production code cannot actually throw
// InterruptedException from
// checkpoint acknowledgement
}
}
return null;
}
}).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
MockStreamTask streamTask = task.streamTask;
waitTaskIsRunning(streamTask, task.invocationFuture);
final long checkpointId = 42L;
streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
acknowledgeCheckpointLatch.await();
ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
// check that the checkpoint has been completed
verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
// check that the subtask state contains the expected state handles
assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
// check that the state handles have not been discarded
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
streamTask.cancel();
completeAcknowledge.trigger();
// canceling the stream task after it has acknowledged the checkpoint should not discard
// the state handles
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
task.waitForTaskCompletion(true);
}
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class StreamTaskTest method testUncaughtExceptionInAsynchronousCheckpointingOperation.
/**
* Tests that uncaught exceptions in the async part of a checkpoint operation are forwarded to
* the uncaught exception handler. See <a
* href="https://issues.apache.org/jira/browse/FLINK-12889">FLINK-12889</a>.
*/
@Test
public void testUncaughtExceptionInAsynchronousCheckpointingOperation() throws Exception {
final RuntimeException failingCause = new RuntimeException("Test exception");
FailingDummyEnvironment failingDummyEnvironment = new FailingDummyEnvironment(failingCause);
// mock the returned snapshots
OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(ExceptionallyDoneFuture.of(failingCause), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
final TestingUncaughtExceptionHandler uncaughtExceptionHandler = new TestingUncaughtExceptionHandler();
RunningTask<MockStreamTask> task = runTask(() -> new MockStreamTask(failingDummyEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult)), uncaughtExceptionHandler));
MockStreamTask streamTask = task.streamTask;
waitTaskIsRunning(streamTask, task.invocationFuture);
streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
final Throwable uncaughtException = uncaughtExceptionHandler.waitForUncaughtException();
assertThat(uncaughtException, is(failingCause));
streamTask.finishInput();
task.waitForTaskCompletion(false);
}
Aggregations