use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseBeforeAcknowledge.
/**
* FLINK-5667
*
* <p>Tests that a concurrent cancel operation discards the state handles of a not yet
* acknowledged checkpoint and prevents sending an acknowledge message to the
* CheckpointCoordinator. The situation can only happen if the cancel call is executed before
* Environment.acknowledgeCheckpoint().
*/
@Test
public void testAsyncCheckpointingConcurrentCloseBeforeAcknowledge() throws Exception {
final TestingKeyedStateHandle managedKeyedStateHandle = new TestingKeyedStateHandle();
final TestingKeyedStateHandle rawKeyedStateHandle = new TestingKeyedStateHandle();
final TestingOperatorStateHandle managedOperatorStateHandle = new TestingOperatorStateHandle();
final TestingOperatorStateHandle rawOperatorStateHandle = new TestingOperatorStateHandle();
final BlockingRunnableFuture<SnapshotResult<KeyedStateHandle>> rawKeyedStateHandleFuture = new BlockingRunnableFuture<>(2, SnapshotResult.of(rawKeyedStateHandle));
OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), rawKeyedStateHandleFuture, DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
final OneInputStreamOperator<String, String> streamOperator = streamOperatorWithSnapshot(operatorSnapshotResult);
final AcknowledgeDummyEnvironment mockEnvironment = new AcknowledgeDummyEnvironment();
RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperator)));
waitTaskIsRunning(task.streamTask, task.invocationFuture);
final long checkpointId = 42L;
task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
rawKeyedStateHandleFuture.awaitRun();
task.streamTask.cancel();
final FutureUtils.ConjunctFuture<Void> discardFuture = FutureUtils.waitForAll(asList(managedKeyedStateHandle.getDiscardFuture(), rawKeyedStateHandle.getDiscardFuture(), managedOperatorStateHandle.getDiscardFuture(), rawOperatorStateHandle.getDiscardFuture()));
// make sure that all state handles have been discarded
discardFuture.get();
try {
mockEnvironment.getAcknowledgeCheckpointFuture().get(10L, TimeUnit.MILLISECONDS);
fail("The checkpoint should not get acknowledged.");
} catch (TimeoutException expected) {
// future should not be completed
}
task.waitForTaskCompletion(true);
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class StreamTaskTest method testFailingAsyncCheckpointRunnable.
/**
* Tests that in case of a failing AsyncCheckpointRunnable all operator snapshot results are
* cancelled and all non partitioned state handles are discarded.
*/
@Test
public void testFailingAsyncCheckpointRunnable() throws Exception {
// mock the new state operator snapshots
OperatorSnapshotFutures operatorSnapshotResult1 = mock(OperatorSnapshotFutures.class);
OperatorSnapshotFutures operatorSnapshotResult2 = mock(OperatorSnapshotFutures.class);
OperatorSnapshotFutures operatorSnapshotResult3 = mock(OperatorSnapshotFutures.class);
RunnableFuture<SnapshotResult<OperatorStateHandle>> failingFuture = mock(RunnableFuture.class);
when(failingFuture.get()).thenThrow(new ExecutionException(new Exception("Test exception")));
when(operatorSnapshotResult3.getOperatorStateRawFuture()).thenReturn(failingFuture);
try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().build()) {
RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult1), streamOperatorWithSnapshot(operatorSnapshotResult2), streamOperatorWithSnapshot(operatorSnapshotResult3))));
MockStreamTask streamTask = task.streamTask;
waitTaskIsRunning(streamTask, task.invocationFuture);
mockEnvironment.setExpectedExternalFailureCause(Throwable.class);
streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
// wait for the completion of the async task
ExecutorService executor = streamTask.getAsyncOperationsThreadPool();
executor.shutdown();
if (!executor.awaitTermination(10000L, TimeUnit.MILLISECONDS)) {
fail("Executor did not shut down within the given timeout. This indicates that the " + "checkpointing did not resume.");
}
assertTrue(mockEnvironment.getActualExternalFailureCause().isPresent());
verify(operatorSnapshotResult1).cancel();
verify(operatorSnapshotResult2).cancel();
verify(operatorSnapshotResult3).cancel();
streamTask.finishInput();
task.waitForTaskCompletion(false);
}
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class RestoreStreamTaskTest method triggerCheckpoint.
private void triggerCheckpoint(OneInputStreamTaskTestHarness<String, String> testHarness, OneInputStreamTask<String, String> streamTask) throws Exception {
long checkpointId = 1L;
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 1L);
streamTask.triggerCheckpointAsync(checkpointMetaData, CheckpointOptions.forCheckpointWithDefaultLocation());
testHarness.taskStateManager.getWaitForReportLatch().await();
long reportedCheckpointId = testHarness.taskStateManager.getReportedCheckpointId();
assertEquals(checkpointId, reportedCheckpointId);
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class StreamTaskFinalCheckpointsTest method testOperatorSkipLifeCycleIfFinishedOnRestore.
@Test
public void testOperatorSkipLifeCycleIfFinishedOnRestore() throws Exception {
try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).addInput(BasicTypeInfo.STRING_TYPE_INFO, 3).setCollectNetworkEvents().setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setupOperatorChain(new TestFinishedOnRestoreStreamOperator()).chain(new TestFinishedOnRestoreStreamOperator(), StringSerializer.INSTANCE).finish().build()) {
// Finish the restore, including state initialization and open.
harness.processAll();
// Try trigger a checkpoint.
harness.getTaskStateManager().getWaitForReportLatch().reset();
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(2, 2);
CheckpointOptions checkpointOptions = new CheckpointOptions(CheckpointType.CHECKPOINT, getDefault());
harness.streamTask.triggerCheckpointOnBarrier(checkpointMetaData, checkpointOptions, new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0).setAlignmentDurationNanos(0));
harness.getTaskStateManager().getWaitForReportLatch().await();
assertEquals(2, harness.getTaskStateManager().getReportedCheckpointId());
// Checkpoint notification.
harness.streamTask.notifyCheckpointCompleteAsync(2);
harness.streamTask.notifyCheckpointAbortAsync(3, 2);
harness.processAll();
// Finish & close operators.
harness.processElement(Watermark.MAX_WATERMARK, 0, 0);
harness.processElement(Watermark.MAX_WATERMARK, 0, 1);
harness.processElement(Watermark.MAX_WATERMARK, 0, 2);
harness.waitForTaskCompletion();
harness.finishProcessing();
assertThat(harness.getOutput(), contains(new CheckpointBarrier(checkpointMetaData.getCheckpointId(), checkpointMetaData.getTimestamp(), checkpointOptions), Watermark.MAX_WATERMARK, new EndOfData(StopMode.DRAIN)));
}
}
use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.
the class SourceOperatorStreamTaskTest method testTriggeringStopWithSavepointWithDrain.
@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 2), WatermarkStrategy.noWatermarks());
CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
CheckpointResponder checkpointResponder = new TestCheckpointResponder() {
@Override
public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
checkpointCompleted.complete(null);
}
};
try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setupOutputForSingletonOperatorChain(sourceOperatorFactory).setCheckpointResponder(checkpointResponder).build()) {
CompletableFuture<Boolean> triggerResult = testHarness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
checkpointCompleted.whenComplete((ignored, exception) -> testHarness.streamTask.notifyCheckpointCompleteAsync(2));
testHarness.waitForTaskCompletion();
testHarness.finishProcessing();
assertTrue(triggerResult.isDone());
assertTrue(triggerResult.get());
assertTrue(checkpointCompleted.isDone());
}
}
Aggregations