Search in sources :

Example 31 with FlinkRuntimeException

use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.

the class StreamTaskFinalCheckpointsTest method doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask.

private void doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask(boolean drain) throws Exception {
    int finalCheckpointId = 6;
    int syncSavepointId = 7;
    CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (syncSavepointId == checkpointId) {
                // complete the final checkpoint when sync savepoint acknowledged
                // we should wait for the sync savepoint to complete
                super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
                try {
                    // Give some potential time for the task to finish before the
                    // savepoint is notified complete
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
                super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).modifyStreamConfig(config -> {
        config.setCheckpointingEnabled(true);
    }).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(new ImmediatelyFinishingSource())).build()) {
        checkpointResponder.setHandlers(testHarness.streamTask::notifyCheckpointCompleteAsync, testHarness.streamTask::notifyCheckpointAbortAsync);
        // Tests triggering checkpoint after received all the inputs have received
        // EndOfPartition.
        // start task thread
        testHarness.streamTask.runMailboxLoop();
        // trigger the final checkpoint
        CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
        // trigger the synchronous savepoint
        CompletableFuture<Boolean> savepointFuture = drain ? triggerStopWithSavepointDrain(testHarness, syncSavepointId) : triggerStopWithSavepointNoDrain(testHarness, syncSavepointId);
        // The checkpoint 6 would be triggered successfully.
        testHarness.finishProcessing();
        assertTrue(checkpointFuture.isDone());
        assertTrue(savepointFuture.isDone());
        testHarness.getTaskStateManager().getWaitForReportLatch().await();
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
        assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) Deadline(org.apache.flink.api.common.time.Deadline) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) ListState(org.apache.flink.api.common.state.ListState) Future(java.util.concurrent.Future) CheckpointStorageLocationReference.getDefault(org.apache.flink.runtime.state.CheckpointStorageLocationReference.getDefault) Duration(java.time.Duration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) TestInputChannel(org.apache.flink.runtime.io.network.partition.consumer.TestInputChannel) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) Collectors(java.util.stream.Collectors) StopMode(org.apache.flink.runtime.io.network.api.StopMode) PipelinedResultPartition(org.apache.flink.runtime.io.network.partition.PipelinedResultPartition) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) STRING_TYPE_INFO(org.apache.flink.api.common.typeinfo.BasicTypeInfo.STRING_TYPE_INFO) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Nullable(javax.annotation.Nullable) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) JobID(org.apache.flink.api.common.JobID)

Example 32 with FlinkRuntimeException

use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.

the class StreamTaskFinalCheckpointsTest method testWaitingForPendingCheckpointsOnFinished.

@Test
public void testWaitingForPendingCheckpointsOnFinished() throws Exception {
    long delayedCheckpointId = 2;
    CompletingCheckpointResponder responder = new CompletingCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            if (delayedCheckpointId == checkpointId) {
                try {
                    // Give some potential time for the task to finish before the
                    // checkpoint is acknowledged, also do not notify its completion
                    Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
                } catch (InterruptedException e) {
                    throw new FlinkRuntimeException(e);
                }
            } else {
                super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            }
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = createTestHarness(responder)) {
        // finish all data
        harness.waitForTaskCompletion();
        // trigger the final checkpoint
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(1, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        // trigger another checkpoint that we want to complete before finishing the task
        harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(delayedCheckpointId, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
        harness.processAll();
        harness.finishProcessing();
        assertEquals(delayedCheckpointId, harness.getTaskStateManager().getReportedCheckpointId());
    }
}
Also used : CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointMetricsBuilder(org.apache.flink.runtime.checkpoint.CheckpointMetricsBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 33 with FlinkRuntimeException

use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.

the class CheckpointFailureManagerITCase method testAsyncCheckpointFailureTriggerJobFailed.

@Test(timeout = 20_000)
public void testAsyncCheckpointFailureTriggerJobFailed() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(500);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.setStateBackend(new AsyncFailureStateBackend());
    env.addSource(new StringGeneratingSourceFunction()).addSink(new DiscardingSink<>());
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
    try {
        // assert that the job only execute checkpoint once and only failed once.
        TestUtils.submitJobAndWaitForResult(cluster.getClusterClient(), jobGraph, getClass().getClassLoader());
    } catch (JobExecutionException jobException) {
        Optional<FlinkRuntimeException> throwable = ExceptionUtils.findThrowable(jobException, FlinkRuntimeException.class);
        Assert.assertTrue(throwable.isPresent());
        Assert.assertEquals(CheckpointFailureManager.EXCEEDED_CHECKPOINT_TOLERABLE_FAILURE_MESSAGE, throwable.get().getMessage());
    }
    // assert that the job only failed once.
    Assert.assertEquals(1, StringGeneratingSourceFunction.INITIALIZE_TIMES.get());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Optional(java.util.Optional) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 34 with FlinkRuntimeException

use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.

the class KubernetesHighAvailabilityRecoverFromSavepointITCase method getConfiguration.

private Configuration getConfiguration() {
    Configuration configuration = new Configuration();
    configuration.set(KubernetesConfigOptions.CLUSTER_ID, CLUSTER_ID);
    configuration.set(HighAvailabilityOptions.HA_MODE, KubernetesHaServicesFactory.class.getCanonicalName());
    try {
        configuration.set(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    } catch (IOException e) {
        throw new FlinkRuntimeException("Failed to create HA storage", e);
    }
    return configuration;
}
Also used : MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) IOException(java.io.IOException)

Example 35 with FlinkRuntimeException

use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.

the class KafkaPartitionSplitReader method parseStoppingOffsets.

private void parseStoppingOffsets(KafkaPartitionSplit split, List<TopicPartition> partitionsStoppingAtLatest, Set<TopicPartition> partitionsStoppingAtCommitted) {
    TopicPartition tp = split.getTopicPartition();
    split.getStoppingOffset().ifPresent(stoppingOffset -> {
        if (stoppingOffset >= 0) {
            stoppingOffsets.put(tp, stoppingOffset);
        } else if (stoppingOffset == KafkaPartitionSplit.LATEST_OFFSET) {
            partitionsStoppingAtLatest.add(tp);
        } else if (stoppingOffset == KafkaPartitionSplit.COMMITTED_OFFSET) {
            partitionsStoppingAtCommitted.add(tp);
        } else {
            // This should not happen.
            throw new FlinkRuntimeException(String.format("Invalid stopping offset %d for partition %s", stoppingOffset, tp));
        }
    });
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException)

Aggregations

FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)78 IOException (java.io.IOException)28 Test (org.junit.Test)13 JobID (org.apache.flink.api.common.JobID)10 HashMap (java.util.HashMap)8 ArrayList (java.util.ArrayList)7 CompletableFuture (java.util.concurrent.CompletableFuture)7 ExecutionException (java.util.concurrent.ExecutionException)7 Nonnull (javax.annotation.Nonnull)7 Configuration (org.apache.flink.configuration.Configuration)6 Collectors (java.util.stream.Collectors)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobResultStore (org.apache.flink.runtime.highavailability.JobResultStore)4 RocksDBException (org.rocksdb.RocksDBException)4 List (java.util.List)3 Map (java.util.Map)3 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)3 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)3 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)3 JobResult (org.apache.flink.runtime.jobmaster.JobResult)3