Search in sources :

Example 16 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseBeforeAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation discards the state handles of a not yet
 * acknowledged checkpoint and prevents sending an acknowledge message to the
 * CheckpointCoordinator. The situation can only happen if the cancel call is executed before
 * Environment.acknowledgeCheckpoint().
 */
@Test
public void testAsyncCheckpointingConcurrentCloseBeforeAcknowledge() throws Exception {
    final TestingKeyedStateHandle managedKeyedStateHandle = new TestingKeyedStateHandle();
    final TestingKeyedStateHandle rawKeyedStateHandle = new TestingKeyedStateHandle();
    final TestingOperatorStateHandle managedOperatorStateHandle = new TestingOperatorStateHandle();
    final TestingOperatorStateHandle rawOperatorStateHandle = new TestingOperatorStateHandle();
    final BlockingRunnableFuture<SnapshotResult<KeyedStateHandle>> rawKeyedStateHandleFuture = new BlockingRunnableFuture<>(2, SnapshotResult.of(rawKeyedStateHandle));
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), rawKeyedStateHandleFuture, DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    final OneInputStreamOperator<String, String> streamOperator = streamOperatorWithSnapshot(operatorSnapshotResult);
    final AcknowledgeDummyEnvironment mockEnvironment = new AcknowledgeDummyEnvironment();
    RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperator)));
    waitTaskIsRunning(task.streamTask, task.invocationFuture);
    final long checkpointId = 42L;
    task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
    rawKeyedStateHandleFuture.awaitRun();
    task.streamTask.cancel();
    final FutureUtils.ConjunctFuture<Void> discardFuture = FutureUtils.waitForAll(asList(managedKeyedStateHandle.getDiscardFuture(), rawKeyedStateHandle.getDiscardFuture(), managedOperatorStateHandle.getDiscardFuture(), rawOperatorStateHandle.getDiscardFuture()));
    // make sure that all state handles have been discarded
    discardFuture.get();
    try {
        mockEnvironment.getAcknowledgeCheckpointFuture().get(10L, TimeUnit.MILLISECONDS);
        fail("The checkpoint should not get acknowledged.");
    } catch (TimeoutException expected) {
    // future should not be completed
    }
    task.waitForTaskCompletion(true);
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 17 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class StreamTaskTest method testFailingAsyncCheckpointRunnable.

/**
 * Tests that in case of a failing AsyncCheckpointRunnable all operator snapshot results are
 * cancelled and all non partitioned state handles are discarded.
 */
@Test
public void testFailingAsyncCheckpointRunnable() throws Exception {
    // mock the new state operator snapshots
    OperatorSnapshotFutures operatorSnapshotResult1 = mock(OperatorSnapshotFutures.class);
    OperatorSnapshotFutures operatorSnapshotResult2 = mock(OperatorSnapshotFutures.class);
    OperatorSnapshotFutures operatorSnapshotResult3 = mock(OperatorSnapshotFutures.class);
    RunnableFuture<SnapshotResult<OperatorStateHandle>> failingFuture = mock(RunnableFuture.class);
    when(failingFuture.get()).thenThrow(new ExecutionException(new Exception("Test exception")));
    when(operatorSnapshotResult3.getOperatorStateRawFuture()).thenReturn(failingFuture);
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult1), streamOperatorWithSnapshot(operatorSnapshotResult2), streamOperatorWithSnapshot(operatorSnapshotResult3))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        mockEnvironment.setExpectedExternalFailureCause(Throwable.class);
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
        // wait for the completion of the async task
        ExecutorService executor = streamTask.getAsyncOperationsThreadPool();
        executor.shutdown();
        if (!executor.awaitTermination(10000L, TimeUnit.MILLISECONDS)) {
            fail("Executor did not shut down within the given timeout. This indicates that the " + "checkpointing did not resume.");
        }
        assertTrue(mockEnvironment.getActualExternalFailureCause().isPresent());
        verify(operatorSnapshotResult1).cancel();
        verify(operatorSnapshotResult2).cancel();
        verify(operatorSnapshotResult3).cancel();
        streamTask.finishInput();
        task.waitForTaskCompletion(false);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) ExecutorService(java.util.concurrent.ExecutorService) ExecutionException(java.util.concurrent.ExecutionException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FunctionWithException(org.apache.flink.util.function.FunctionWithException) AsynchronousException(org.apache.flink.runtime.taskmanager.AsynchronousException) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TimeoutException(java.util.concurrent.TimeoutException) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) SupplierWithException(org.apache.flink.util.function.SupplierWithException) BiConsumerWithException(org.apache.flink.util.function.BiConsumerWithException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 18 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class RestoreStreamTaskTest method triggerCheckpoint.

private void triggerCheckpoint(OneInputStreamTaskTestHarness<String, String> testHarness, OneInputStreamTask<String, String> streamTask) throws Exception {
    long checkpointId = 1L;
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 1L);
    streamTask.triggerCheckpointAsync(checkpointMetaData, CheckpointOptions.forCheckpointWithDefaultLocation());
    testHarness.taskStateManager.getWaitForReportLatch().await();
    long reportedCheckpointId = testHarness.taskStateManager.getReportedCheckpointId();
    assertEquals(checkpointId, reportedCheckpointId);
}
Also used : CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData)

Example 19 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testOperatorSkipLifeCycleIfFinishedOnRestore.

@Test
public void testOperatorSkipLifeCycleIfFinishedOnRestore() throws Exception {
    try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).addInput(BasicTypeInfo.STRING_TYPE_INFO, 3).setCollectNetworkEvents().setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setupOperatorChain(new TestFinishedOnRestoreStreamOperator()).chain(new TestFinishedOnRestoreStreamOperator(), StringSerializer.INSTANCE).finish().build()) {
        // Finish the restore, including state initialization and open.
        harness.processAll();
        // Try trigger a checkpoint.
        harness.getTaskStateManager().getWaitForReportLatch().reset();
        CheckpointMetaData checkpointMetaData = new CheckpointMetaData(2, 2);
        CheckpointOptions checkpointOptions = new CheckpointOptions(CheckpointType.CHECKPOINT, getDefault());
        harness.streamTask.triggerCheckpointOnBarrier(checkpointMetaData, checkpointOptions, new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0).setAlignmentDurationNanos(0));
        harness.getTaskStateManager().getWaitForReportLatch().await();
        assertEquals(2, harness.getTaskStateManager().getReportedCheckpointId());
        // Checkpoint notification.
        harness.streamTask.notifyCheckpointCompleteAsync(2);
        harness.streamTask.notifyCheckpointAbortAsync(3, 2);
        harness.processAll();
        // Finish & close operators.
        harness.processElement(Watermark.MAX_WATERMARK, 0, 0);
        harness.processElement(Watermark.MAX_WATERMARK, 0, 1);
        harness.processElement(Watermark.MAX_WATERMARK, 0, 2);
        harness.waitForTaskCompletion();
        harness.finishProcessing();
        assertThat(harness.getOutput(), contains(new CheckpointBarrier(checkpointMetaData.getCheckpointId(), checkpointMetaData.getTimestamp(), checkpointOptions), Watermark.MAX_WATERMARK, new EndOfData(StopMode.DRAIN)));
    }
}
Also used : CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Test(org.junit.Test)

Example 20 with CheckpointMetaData

use of org.apache.flink.runtime.checkpoint.CheckpointMetaData in project flink by apache.

the class SourceOperatorStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 2), WatermarkStrategy.noWatermarks());
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setupOutputForSingletonOperatorChain(sourceOperatorFactory).setCheckpointResponder(checkpointResponder).build()) {
        CompletableFuture<Boolean> triggerResult = testHarness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> testHarness.streamTask.notifyCheckpointCompleteAsync(2));
        testHarness.waitForTaskCompletion();
        testHarness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : MockSource(org.apache.flink.api.connector.source.mocks.MockSource) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)47 Test (org.junit.Test)33 CheckpointMetricsBuilder (org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder)16 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)13 IOException (java.io.IOException)12 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)11 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)11 OperatorSnapshotFutures (org.apache.flink.streaming.api.operators.OperatorSnapshotFutures)11 JobID (org.apache.flink.api.common.JobID)10 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)10 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)10 ExecutionException (java.util.concurrent.ExecutionException)9 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)9 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)8 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)8 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)7 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)7 CompletableFuture (java.util.concurrent.CompletableFuture)6