use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.
the class StreamTaskFinalCheckpointsTest method doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask.
private void doTestTriggerStopWithSavepointWhenWaitingForFinalCheckpointOnSourceTask(boolean drain) throws Exception {
int finalCheckpointId = 6;
int syncSavepointId = 7;
CompletingCheckpointResponder checkpointResponder = new CompletingCheckpointResponder() {
@Override
public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
if (syncSavepointId == checkpointId) {
// complete the final checkpoint when sync savepoint acknowledged
// we should wait for the sync savepoint to complete
super.acknowledgeCheckpoint(jobID, executionAttemptID, finalCheckpointId, checkpointMetrics, subtaskState);
try {
// Give some potential time for the task to finish before the
// savepoint is notified complete
Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
} catch (InterruptedException e) {
throw new FlinkRuntimeException(e);
}
super.acknowledgeCheckpoint(jobID, executionAttemptID, syncSavepointId, checkpointMetrics, subtaskState);
}
}
};
try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).modifyStreamConfig(config -> {
config.setCheckpointingEnabled(true);
}).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(new ImmediatelyFinishingSource())).build()) {
checkpointResponder.setHandlers(testHarness.streamTask::notifyCheckpointCompleteAsync, testHarness.streamTask::notifyCheckpointAbortAsync);
// Tests triggering checkpoint after received all the inputs have received
// EndOfPartition.
// start task thread
testHarness.streamTask.runMailboxLoop();
// trigger the final checkpoint
CompletableFuture<Boolean> checkpointFuture = triggerCheckpoint(testHarness, finalCheckpointId);
// trigger the synchronous savepoint
CompletableFuture<Boolean> savepointFuture = drain ? triggerStopWithSavepointDrain(testHarness, syncSavepointId) : triggerStopWithSavepointNoDrain(testHarness, syncSavepointId);
// The checkpoint 6 would be triggered successfully.
testHarness.finishProcessing();
assertTrue(checkpointFuture.isDone());
assertTrue(savepointFuture.isDone());
testHarness.getTaskStateManager().getWaitForReportLatch().await();
assertEquals(syncSavepointId, testHarness.getTaskStateManager().getReportedCheckpointId());
assertEquals(syncSavepointId, testHarness.getTaskStateManager().getNotifiedCompletedCheckpointId());
}
}
use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.
the class StreamTaskFinalCheckpointsTest method testWaitingForPendingCheckpointsOnFinished.
@Test
public void testWaitingForPendingCheckpointsOnFinished() throws Exception {
long delayedCheckpointId = 2;
CompletingCheckpointResponder responder = new CompletingCheckpointResponder() {
@Override
public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
if (delayedCheckpointId == checkpointId) {
try {
// Give some potential time for the task to finish before the
// checkpoint is acknowledged, also do not notify its completion
Thread.sleep(CONCURRENT_EVENT_WAIT_PERIOD_MS);
} catch (InterruptedException e) {
throw new FlinkRuntimeException(e);
}
} else {
super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
}
}
};
try (StreamTaskMailboxTestHarness<String> harness = createTestHarness(responder)) {
// finish all data
harness.waitForTaskCompletion();
// trigger the final checkpoint
harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(1, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
// trigger another checkpoint that we want to complete before finishing the task
harness.streamTask.triggerCheckpointOnBarrier(new CheckpointMetaData(delayedCheckpointId, 101), CheckpointOptions.forCheckpointWithDefaultLocation(), new CheckpointMetricsBuilder().setBytesProcessedDuringAlignment(0L).setAlignmentDurationNanos(0L));
harness.processAll();
harness.finishProcessing();
assertEquals(delayedCheckpointId, harness.getTaskStateManager().getReportedCheckpointId());
}
}
use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.
the class CheckpointFailureManagerITCase method testAsyncCheckpointFailureTriggerJobFailed.
@Test(timeout = 20_000)
public void testAsyncCheckpointFailureTriggerJobFailed() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(500);
env.setRestartStrategy(RestartStrategies.noRestart());
env.setStateBackend(new AsyncFailureStateBackend());
env.addSource(new StringGeneratingSourceFunction()).addSink(new DiscardingSink<>());
JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
try {
// assert that the job only execute checkpoint once and only failed once.
TestUtils.submitJobAndWaitForResult(cluster.getClusterClient(), jobGraph, getClass().getClassLoader());
} catch (JobExecutionException jobException) {
Optional<FlinkRuntimeException> throwable = ExceptionUtils.findThrowable(jobException, FlinkRuntimeException.class);
Assert.assertTrue(throwable.isPresent());
Assert.assertEquals(CheckpointFailureManager.EXCEEDED_CHECKPOINT_TOLERABLE_FAILURE_MESSAGE, throwable.get().getMessage());
}
// assert that the job only failed once.
Assert.assertEquals(1, StringGeneratingSourceFunction.INITIALIZE_TIMES.get());
}
use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.
the class KubernetesHighAvailabilityRecoverFromSavepointITCase method getConfiguration.
private Configuration getConfiguration() {
Configuration configuration = new Configuration();
configuration.set(KubernetesConfigOptions.CLUSTER_ID, CLUSTER_ID);
configuration.set(HighAvailabilityOptions.HA_MODE, KubernetesHaServicesFactory.class.getCanonicalName());
try {
configuration.set(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
} catch (IOException e) {
throw new FlinkRuntimeException("Failed to create HA storage", e);
}
return configuration;
}
use of org.apache.flink.util.FlinkRuntimeException in project flink by apache.
the class KafkaPartitionSplitReader method parseStoppingOffsets.
private void parseStoppingOffsets(KafkaPartitionSplit split, List<TopicPartition> partitionsStoppingAtLatest, Set<TopicPartition> partitionsStoppingAtCommitted) {
TopicPartition tp = split.getTopicPartition();
split.getStoppingOffset().ifPresent(stoppingOffset -> {
if (stoppingOffset >= 0) {
stoppingOffsets.put(tp, stoppingOffset);
} else if (stoppingOffset == KafkaPartitionSplit.LATEST_OFFSET) {
partitionsStoppingAtLatest.add(tp);
} else if (stoppingOffset == KafkaPartitionSplit.COMMITTED_OFFSET) {
partitionsStoppingAtCommitted.add(tp);
} else {
// This should not happen.
throw new FlinkRuntimeException(String.format("Invalid stopping offset %d for partition %s", stoppingOffset, tp));
}
});
}
Aggregations