use of org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory in project flink by apache.
the class DefaultSchedulerTest method doTestCheckpointCleanerIsClosedAfterCheckpointServices.
/**
* Visible for re-use in {@link
* org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest}.
*/
public static void doTestCheckpointCleanerIsClosedAfterCheckpointServices(BiFunction<CheckpointRecoveryFactory, CheckpointsCleaner, SchedulerNG> schedulerFactory, ScheduledExecutorService executorService) throws Exception {
final CountDownLatch checkpointServicesShutdownBlocked = new CountDownLatch(1);
final CountDownLatch cleanerClosed = new CountDownLatch(1);
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1) {
@Override
public void shutdown(JobStatus jobStatus, CheckpointsCleaner checkpointsCleaner) throws Exception {
checkpointServicesShutdownBlocked.await();
super.shutdown(jobStatus, checkpointsCleaner);
}
};
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter() {
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
checkpointServicesShutdownBlocked.await();
super.shutdown(jobStatus);
}
};
final CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner() {
@Override
public synchronized CompletableFuture<Void> closeAsync() {
cleanerClosed.countDown();
return super.closeAsync();
}
};
final SchedulerNG scheduler = schedulerFactory.apply(new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter), checkpointsCleaner);
final CompletableFuture<Void> schedulerClosed = new CompletableFuture<>();
final CountDownLatch schedulerClosing = new CountDownLatch(1);
executorService.submit(() -> {
scheduler.closeAsync().thenRun(() -> schedulerClosed.complete(null));
schedulerClosing.countDown();
});
// Wait for scheduler to start closing.
schedulerClosing.await();
assertFalse("CheckpointCleaner should not close before checkpoint services.", cleanerClosed.await(10, TimeUnit.MILLISECONDS));
checkpointServicesShutdownBlocked.countDown();
cleanerClosed.await();
schedulerClosed.get();
}
use of org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailureFromStopWithSavepoint.
@Test
public void testExceptionHistoryWithTaskFailureFromStopWithSavepoint() throws Exception {
final Exception expectedException = new Exception("Expected Local Exception");
Consumer<JobGraph> setupJobGraph = jobGraph -> jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
Consumer<AdaptiveSchedulerBuilder> setupScheduler = builder -> builder.setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner);
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
final ExecutionAttemptID attemptId = attemptIds.get(1);
scheduler.stopWithSavepoint("file:///tmp/target", true, SavepointFormatType.CANONICAL);
scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
};
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic, setupScheduler, setupJobGraph);
assertThat(actualExceptionHistory).hasSize(1);
final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
use of org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory in project flink by apache.
the class AdaptiveSchedulerTest method testCloseShutsDownCheckpointingComponents.
@Test
public void testCloseShutsDownCheckpointingComponents() throws Exception {
final CompletableFuture<JobStatus> completedCheckpointStoreShutdownFuture = new CompletableFuture<>();
final CompletedCheckpointStore completedCheckpointStore = TestingCompletedCheckpointStore.createStoreWithShutdownCheckAndNoCompletedCheckpoints(completedCheckpointStoreShutdownFuture);
final CompletableFuture<JobStatus> checkpointIdCounterShutdownFuture = new CompletableFuture<>();
final CheckpointIDCounter checkpointIdCounter = TestingCheckpointIDCounter.createStoreWithShutdownCheckAndNoStartAction(checkpointIdCounterShutdownFuture);
final JobGraph jobGraph = createJobGraph();
// checkpointing components are only created if checkpointing is enabled
jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
final AdaptiveScheduler scheduler = new AdaptiveSchedulerBuilder(jobGraph, singleThreadMainThreadExecutor).setCheckpointRecoveryFactory(new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIdCounter)).build();
singleThreadMainThreadExecutor.execute(() -> {
scheduler.startScheduling();
// transition into the FAILED state
scheduler.handleGlobalFailure(new FlinkException("Test exception"));
scheduler.closeAsync();
});
assertThat(completedCheckpointStoreShutdownFuture.get()).isEqualTo(JobStatus.FAILED);
assertThat(checkpointIdCounterShutdownFuture.get()).isEqualTo(JobStatus.FAILED);
}
use of org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory in project flink by apache.
the class AdaptiveSchedulerTest method runExceptionHistoryTests.
private Iterable<RootExceptionHistoryEntry> runExceptionHistoryTests(BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic, Consumer<AdaptiveSchedulerBuilder> setupScheduler, Consumer<JobGraph> setupJobGraph) throws Exception {
final int numAvailableSlots = 4;
final JobGraph jobGraph = createJobGraph();
setupJobGraph.accept(jobGraph);
RunFailedJobListener listener = new RunFailedJobListener();
List<ExecutionAttemptID> cancelledTasks = new ArrayList<>();
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
final DefaultDeclarativeSlotPool declarativeSlotPool = createDeclarativeSlotPool(jobGraph.getJobID());
final Configuration configuration = new Configuration();
configuration.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofMillis(1L));
AdaptiveSchedulerBuilder builder = new AdaptiveSchedulerBuilder(jobGraph, singleThreadMainThreadExecutor).setJobMasterConfiguration(configuration).setDeclarativeSlotPool(declarativeSlotPool).setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner).setJobStatusListener(listener);
setupScheduler.accept(builder);
final AdaptiveScheduler scheduler = builder.build();
final SubmissionBufferingTaskManagerGateway taskManagerGateway = new SubmissionBufferingTaskManagerGateway(numAvailableSlots);
taskManagerGateway.setCancelConsumer(cancelledTasks::add);
singleThreadMainThreadExecutor.execute(() -> {
scheduler.startScheduling();
offerSlots(declarativeSlotPool, createSlotOffersForResourceRequirements(ResourceCounter.withResource(ResourceProfile.UNKNOWN, numAvailableSlots)), taskManagerGateway);
});
listener.waitForRunning();
CompletableFuture<Iterable<ArchivedExecutionVertex>> vertexFuture = new CompletableFuture<>();
singleThreadMainThreadExecutor.execute(() -> vertexFuture.complete(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices()));
final Iterable<ArchivedExecutionVertex> executionVertices = vertexFuture.get();
final List<ExecutionAttemptID> attemptIds = IterableUtils.toStream(executionVertices).map(ArchivedExecutionVertex::getCurrentExecutionAttempt).map(ArchivedExecution::getAttemptId).collect(Collectors.toList());
CompletableFuture<Void> runTestLogicFuture = CompletableFuture.runAsync(() -> testLogic.accept(scheduler, attemptIds), singleThreadMainThreadExecutor);
runTestLogicFuture.get();
Consumer<ExecutionAttemptID> canceller = attemptId -> scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.CANCELED, null)));
CompletableFuture<Void> cancelFuture = CompletableFuture.runAsync(() -> cancelledTasks.forEach(canceller), singleThreadMainThreadExecutor);
cancelFuture.get();
listener.waitForTerminal();
return scheduler.requestJob().getExceptionHistory();
}
Aggregations