Search in sources :

Example 21 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint.

@Test
public void testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        int finalCheckpointId = 6;
        int syncSavepointId = 7;
        CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

            @Override
            public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
                if (syncSavepointId == checkpointId) {
                    // complete the final checkpoint when sync savepoint acknowledged
                    // we should wait for the sync savepoint to complete
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                    try {
                        // Give some potential time for the task to finish before the
                        // savepoint is notified complete
                        Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                    } catch (InterruptedException e) {
                        throw new FlinkRuntimeException(e);
                    }
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
                }
            }
        };
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, checkpointResponder, false)) {
            // Tests triggering checkpoint after received all the inputs have received
            // EndOfPartition.
            testHarness.waitForTaskCompletion();
            // trigger the final checkpoint
            CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkpointFuture.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            // trigger the synchronous savepoint
            CompletableFuture<Boolean> savepointFuture = triggerStopWithSavepointDrain(testHarness, syncSavepointId);
            // The checkpoint 6 would be triggered successfully.
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            assertTrue(savepointFuture.isDone());
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
            // Each result partition should have emitted 2 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(3, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 22 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class StreamTaskFinalCheckpointsTest method doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask.

private void doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask(boolean drain) throws Exception {
    int finalCheckpointId = 6;
    int syncSavepointId = 7;
    CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (syncSavepointId == checkpointId) {
                // complete the final checkpoint when sync savepoint acknowledged
                // we should wait for the sync savepoint to complete
                super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                try {
                    // Give some potential time for the task to finish before the
                    // savepoint is notified complete
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
                super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).modifyStreamConfig(config -> {
        config.setCheckpointingEnabled(true);
    }).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(new ImmediatelyFinishingSource())).build()) {
        checkpointResponder.setHandlers(testHarness.streamTask::notifyCheckpointCompleteAsync, testHarness.streamTask::notifyCheckpointAbortAsync);
        // Tests triggering checkpoint after received all the inputs have received
        // EndOfPartition.
        // start task thread
        testHarness.streamTask.runMailboxLoop();
        // trigger the final checkpoint
        CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
        // trigger the synchronous savepoint
        CompletableFuture<Boolean> savepointFuture = drain ? triggerStopWithSavepointDrain(testHarness, syncSavepointId) : triggerStopWithSavepointNoDrain(testHarness, syncSavepointId);
        // The checkpoint 6 would be triggered successfully.
        testHarness.finishProcessing();
        assertTrue(checkpointFuture.isDone());
        assertTrue(savepointFuture.isDone());
        testHarness.getTaskStateManager().getWaitForReportLatch().await();
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) Deadline(org.apache.flink.api.common.time.Deadline) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ListState(org.apache.flink.api.common.state.ListState) Future(java.util.concurrent.Future) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Duration(java.time.Duration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) Collectors(java.util.stream.Collectors) StopMode(org.apache.flink.runtime.io.network.api.StopMode) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Nullable(javax.annotation.Nullable) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID)

Example 23 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class SourceOperatorStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 2), WatermarkStrategy.noWatermarks());
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setupOutputForSingletonOperatorChain(sourceOperatorFactory).setCheckpointResponder(checkpointResponder).build()) {
        CompletableFuture<Boolean> triggerResult = testHarness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> testHarness.streamTask.notifyCheckpointCompleteAsync(2));
        testHarness.waitForTaskCompletion();
        testHarness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : MockSource(org.apache.flink.api.connector.source.mocks.MockSource) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 24 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForPendingCheckpointsOnFinished.

@Test
public void testWaitingForPendingCheckpointsOnFinished() throws Exception {
    long delayedCheckpointId = 2;
    CompletingCheckpointResponder responder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (delayedCheckpointId == checkpointId) {
                try {
                    // Give some potential time for the task to finish before the
                    // checkpoint is acknowledged, also do not notify its completion
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
            } else {
                super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = createTestHarness(responder)) {
        // finish all data
        harness.waitForTaskCompletion();
        // trigger the final checkpoint
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(1, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        // trigger another checkpoint that we want to complete before finishing the task
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(delayedCheckpointId, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        harness.processAll();
        harness.finishProcessing();
        assertEquals(delayedCheckpointId, harness.getTaskStateManager().getReportedCheckpointId());
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 25 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class ExecutionTest method testTaskRestoreStateIsNulledAfterDeployment.

/**
 * Tests that the task restore state is nulled after the {@link Execution} has been deployed.
 * See FLINK-9693.
 */
@Test
public void testTaskRestoreStateIsNulledAfterDeployment() throws Exception {
    final JobVertex jobVertex = createNoOpJobVertex();
    final JobVertexID jobVertexId = jobVertex.getID();
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.streamingJobGraph(jobVertex), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(1))).build();
    ExecutionJobVertex executionJobVertex = scheduler.getExecutionJobVertex(jobVertexId);
    ExecutionVertex executionVertex = executionJobVertex.getTaskVertices()[0];
    final Execution execution = executionVertex.getCurrentExecutionAttempt();
    final JobManagerTaskRestore taskRestoreState = new JobManagerTaskRestore(1L, new TaskStateSnapshot());
    execution.setInitialState(taskRestoreState);
    assertThat(execution.getTaskRestore(), is(notNullValue()));
    // schedule the execution vertex and wait for its deployment
    scheduler.startScheduling();
    assertThat(execution.getTaskRestore(), is(nullValue()));
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Aggregations

TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)42 Test (org.junit.Test)28 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)19 JobID (org.apache.flink.api.common.JobID)17 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)13 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)13 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)13 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)9 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)5 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4