Search in sources :

Example 1 with CompletingCheckpointResponder

use of org.apache.flink.streaming.util.CompletingCheckpointResponder in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForFinalCheckpointNotTheFirsNotifiedComplete.

@Test
public void testWaitingForFinalCheckpointNotTheFirsNotifiedComplete() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder();
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, checkpointResponder, false)) {
            // complete only the third checkpoint
            checkpointResponder.completeCheckpoints(Collections.singletonList(3L));
            // finish data on all channels
            testHarness.waitForTaskCompletion();
            // trigger the first checkpoint
            CompletableFuture<Boolean> firstCheckpoint = triggerCheckpoint(testHarness, 1);
            // Notifies the result partition that all records are processed after the
            // first checkpoint is triggered.
            firstCheckpoint.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            testHarness.processAll();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            // trigger a second checkpoint
            triggerCheckpoint(testHarness, 2L);
            testHarness.processAll();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            // trigger the third checkpoint
            triggerCheckpoint(testHarness, 3L);
            testHarness.processAll();
            testHarness.finishProcessing();
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(3L, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertEquals(3L, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
            // Each result partition should have emitted 3 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(4, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) Test(org.junit.Test)

Example 2 with CompletingCheckpointResponder

use of org.apache.flink.streaming.util.CompletingCheckpointResponder in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint.

@Test
public void testTriggerStopWithSavepointWhenWaitingForFinalCheckpoint() throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        int finalCheckpointId = 6;
        int syncSavepointId = 7;
        CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

            @Override
            public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
                if (syncSavepointId == checkpointId) {
                    // complete the final checkpoint when sync savepoint acknowledged
                    // we should wait for the sync savepoint to complete
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                    try {
                        // Give some potential time for the task to finish before the
                        // savepoint is notified complete
                        Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                    } catch (InterruptedException e) {
                        throw new FlinkRuntimeException(e);
                    }
                    super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
                }
            }
        };
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, checkpointResponder, false)) {
            // Tests triggering checkpoint after received all the inputs have received
            // EndOfPartition.
            testHarness.waitForTaskCompletion();
            // trigger the final checkpoint
            CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkpointFuture.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            // trigger the synchronous savepoint
            CompletableFuture<Boolean> savepointFuture = triggerStopWithSavepointDrain(testHarness, syncSavepointId);
            // The checkpoint 6 would be triggered successfully.
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            assertTrue(savepointFuture.isDone());
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
            // Each result partition should have emitted 2 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(3, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with CompletingCheckpointResponder

use of org.apache.flink.streaming.util.CompletingCheckpointResponder in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testTriggeringCheckpointWithFinishedChannels.

private void testTriggeringCheckpointWithFinishedChannels(CheckpointOptions checkpointOptions) throws Exception {
    ResultPartition[] partitionWriters = new ResultPartition[2];
    try {
        for (int i = 0; i < partitionWriters.length; ++i) {
            partitionWriters[i] = PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED_BOUNDED);
            partitionWriters[i].setup();
        }
        try (StreamTaskMailboxTestHarness<String> testHarness = createTestHarness(partitionWriters, new CompletingCheckpointResponder(), checkpointOptions.isUnalignedCheckpoint() || checkpointOptions.isTimeoutable())) {
            int numChannels = testHarness.inputGates[0].getInputGate().getNumberOfInputChannels();
            int[] resumedCount = new int[numChannels];
            for (int i = 0; i < numChannels; ++i) {
                TestInputChannel inputChannel = (TestInputChannel) testHarness.inputGates[0].getInputGate().getChannel(i);
                inputChannel.setActionOnResumed(() -> resumedCount[inputChannel.getChannelIndex()]++);
            }
            // Tests triggering checkpoint when all the inputs are alive.
            CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, 2, checkpointOptions);
            processMailTillCheckpointSucceeds(testHarness, checkpointFuture);
            assertEquals(2, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Tests triggering checkpoint after some inputs have received EndOfPartition.
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 0);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 0);
            checkpointFuture = triggerCheckpoint(testHarness, 4, checkpointOptions);
            processMailTillCheckpointSucceeds(testHarness, checkpointFuture);
            assertEquals(4, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Tests triggering checkpoint after received all the inputs have received
            // EndOfPartition.
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 1);
            testHarness.processEvent(new EndOfData(StopMode.DRAIN), 0, 2);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 1);
            testHarness.processEvent(EndOfPartitionEvent.INSTANCE, 0, 2);
            checkpointFuture = triggerCheckpoint(testHarness, 6, checkpointOptions);
            // Notifies the result partition that all records are processed after the
            // last checkpoint is triggered.
            checkpointFuture.thenAccept((ignored) -> {
                for (ResultPartition resultPartition : partitionWriters) {
                    resultPartition.onSubpartitionAllDataProcessed(0);
                }
            });
            // The checkpoint 6 would be triggered successfully.
            testHarness.finishProcessing();
            assertTrue(checkpointFuture.isDone());
            testHarness.getTaskStateManager().getWaitForReportLatch().await();
            assertEquals(6, testHarness.getTaskStateManager().getReportedCheckpointId());
            assertArrayEquals(new int[] { 0, 0, 0 }, resumedCount);
            // Each result partition should have emitted 3 barriers and 1 EndOfUserRecordsEvent.
            for (ResultPartition resultPartition : partitionWriters) {
                assertEquals(4, resultPartition.getNumberOfQueuedBuffers());
            }
        }
    } finally {
        for (ResultPartitionWriter writer : partitionWriters) {
            if (writer != null) {
                writer.close();
            }
        }
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition)

Example 4 with CompletingCheckpointResponder

use of org.apache.flink.streaming.util.CompletingCheckpointResponder in project flink by apache.

the class StreamTaskFinalCheckpointsTest method doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask.

private void doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask(boolean drain) throws Exception {
    int finalCheckpointId = 6;
    int syncSavepointId = 7;
    CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (syncSavepointId == checkpointId) {
                // complete the final checkpoint when sync savepoint acknowledged
                // we should wait for the sync savepoint to complete
                super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                try {
                    // Give some potential time for the task to finish before the
                    // savepoint is notified complete
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
                super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).modifyStreamConfig(config -> {
        config.setCheckpointingEnabled(true);
    }).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(new ImmediatelyFinishingSource())).build()) {
        checkpointResponder.setHandlers(testHarness.streamTask::notifyCheckpointCompleteAsync, testHarness.streamTask::notifyCheckpointAbortAsync);
        // Tests triggering checkpoint after received all the inputs have received
        // EndOfPartition.
        // start task thread
        testHarness.streamTask.runMailboxLoop();
        // trigger the final checkpoint
        CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
        // trigger the synchronous savepoint
        CompletableFuture<Boolean> savepointFuture = drain ? triggerStopWithSavepointDrain(testHarness, syncSavepointId) : triggerStopWithSavepointNoDrain(testHarness, syncSavepointId);
        // The checkpoint 6 would be triggered successfully.
        testHarness.finishProcessing();
        assertTrue(checkpointFuture.isDone());
        assertTrue(savepointFuture.isDone());
        testHarness.getTaskStateManager().getWaitForReportLatch().await();
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) Deadline(org.apache.flink.api.common.time.Deadline) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ListState(org.apache.flink.api.common.state.ListState) Future(java.util.concurrent.Future) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Duration(java.time.Duration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) Collectors(java.util.stream.Collectors) StopMode(org.apache.flink.runtime.io.network.api.StopMode) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Nullable(javax.annotation.Nullable) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID)

Example 5 with CompletingCheckpointResponder

use of org.apache.flink.streaming.util.CompletingCheckpointResponder in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForPendingCheckpointsOnFinished.

@Test
public void testWaitingForPendingCheckpointsOnFinished() throws Exception {
    long delayedCheckpointId = 2;
    CompletingCheckpointResponder responder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (delayedCheckpointId == checkpointId) {
                try {
                    // Give some potential time for the task to finish before the
                    // checkpoint is acknowledged, also do not notify its completion
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
            } else {
                super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = createTestHarness(responder)) {
        // finish all data
        harness.waitForTaskCompletion();
        // trigger the final checkpoint
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(1, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        // trigger another checkpoint that we want to complete before finishing the task
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(delayedCheckpointId, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        harness.processAll();
        harness.finishProcessing();
        assertEquals(delayedCheckpointId, harness.getTaskStateManager().getReportedCheckpointId());
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

CompletingCheckpointResponder (org.apache.flink.streaming.util.CompletingCheckpointResponder)9 ResultPartitionWriter (org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter)8 ResultPartition (org.apache.flink.runtime.io.network.partition.ResultPartition)8 Test (org.junit.Test)7 EndOfData (org.apache.flink.runtime.io.network.api.EndOfData)6 PipelinedResultPartition (org.apache.flink.runtime.io.network.partition.PipelinedResultPartition)6 JobID (org.apache.flink.api.common.JobID)5 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)5 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)5 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)5 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)4 Duration (java.time.Duration)3 Collections (java.util.Collections)3 CompletableFuture (java.util.concurrent.CompletableFuture)3 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)3 StringSerializer (org.apache.flink.api.common.typeutils.base.StringSerializer)3 SavepointFormatType (org.apache.flink.core.execution.SavepointFormatType)3 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)3 CheckpointType (org.apache.flink.runtime.checkpoint.CheckpointType)3 SavepointType (org.apache.flink.runtime.checkpoint.SavepointType)3