use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class DefaultSchedulerTest method doTestCheckpointCleanerIsClosedAfterCheckpointServices.
/**
* Visible for re-use in {@link
* org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest}.
*/
public static void doTestCheckpointCleanerIsClosedAfterCheckpointServices(BiFunction<CheckpointRecoveryFactory, CheckpointsCleaner, SchedulerNG> schedulerFactory, ScheduledExecutorService executorService) throws Exception {
final CountDownLatch checkpointServicesShutdownBlocked = new CountDownLatch(1);
final CountDownLatch cleanerClosed = new CountDownLatch(1);
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1) {
@Override
public void shutdown(JobStatus jobStatus, CheckpointsCleaner checkpointsCleaner) throws Exception {
checkpointServicesShutdownBlocked.await();
super.shutdown(jobStatus, checkpointsCleaner);
}
};
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter() {
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
checkpointServicesShutdownBlocked.await();
super.shutdown(jobStatus);
}
};
final CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner() {
@Override
public synchronized CompletableFuture<Void> closeAsync() {
cleanerClosed.countDown();
return super.closeAsync();
}
};
final SchedulerNG scheduler = schedulerFactory.apply(new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter), checkpointsCleaner);
final CompletableFuture<Void> schedulerClosed = new CompletableFuture<>();
final CountDownLatch schedulerClosing = new CountDownLatch(1);
executorService.submit(() -> {
scheduler.closeAsync().thenRun(() -> schedulerClosed.complete(null));
schedulerClosing.countDown();
});
// Wait for scheduler to start closing.
schedulerClosing.await();
assertFalse("CheckpointCleaner should not close before checkpoint services.", cleanerClosed.await(10, TimeUnit.MILLISECONDS));
checkpointServicesShutdownBlocked.countDown();
cleanerClosed.await();
schedulerClosed.get();
}
use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailureFromStopWithSavepoint.
@Test
public void testExceptionHistoryWithTaskFailureFromStopWithSavepoint() throws Exception {
final Exception expectedException = new Exception("Expected Local Exception");
Consumer<JobGraph> setupJobGraph = jobGraph -> jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
Consumer<AdaptiveSchedulerBuilder> setupScheduler = builder -> builder.setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner);
BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
final ExecutionAttemptID attemptId = attemptIds.get(1);
scheduler.stopWithSavepoint("file:///tmp/target", true, SavepointFormatType.CANONICAL);
scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
};
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic, setupScheduler, setupJobGraph);
assertThat(actualExceptionHistory).hasSize(1);
final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class DefaultExecutionGraphFactoryTest method testRestoringModifiedJobFromSavepointFails.
@Test
public void testRestoringModifiedJobFromSavepointFails() throws Exception {
final JobGraph jobGraphWithNewOperator = createJobGraphWithSavepoint(false, 42L);
final ExecutionGraphFactory executionGraphFactory = createExecutionGraphFactory();
try {
executionGraphFactory.createAndRestoreExecutionGraph(jobGraphWithNewOperator, new StandaloneCompletedCheckpointStore(1), new CheckpointsCleaner(), new StandaloneCheckpointIDCounter(), TaskDeploymentDescriptorFactory.PartitionLocationConstraint.CAN_BE_UNKNOWN, 0L, new DefaultVertexAttemptNumberStore(), SchedulerBase.computeVertexParallelismStore(jobGraphWithNewOperator), (execution, previousState, newState) -> {
}, log);
fail("Expected ExecutionGraph creation to fail because of non restored state.");
} catch (Exception e) {
assertThat(e, FlinkMatchers.containsMessage("Failed to rollback to checkpoint/savepoint"));
}
}
use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class DefaultExecutionGraphFactoryTest method testRestoringModifiedJobFromSavepointWithAllowNonRestoredStateSucceeds.
@Test
public void testRestoringModifiedJobFromSavepointWithAllowNonRestoredStateSucceeds() throws Exception {
// create savepoint data
final long savepointId = 42L;
final JobGraph jobGraphWithNewOperator = createJobGraphWithSavepoint(true, savepointId);
final ExecutionGraphFactory executionGraphFactory = createExecutionGraphFactory();
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
executionGraphFactory.createAndRestoreExecutionGraph(jobGraphWithNewOperator, completedCheckpointStore, new CheckpointsCleaner(), new StandaloneCheckpointIDCounter(), TaskDeploymentDescriptorFactory.PartitionLocationConstraint.CAN_BE_UNKNOWN, 0L, new DefaultVertexAttemptNumberStore(), SchedulerBase.computeVertexParallelismStore(jobGraphWithNewOperator), (execution, previousState, newState) -> {
}, log);
final CompletedCheckpoint savepoint = completedCheckpointStore.getLatestCheckpoint();
MatcherAssert.assertThat(savepoint, notNullValue());
MatcherAssert.assertThat(savepoint.getCheckpointID(), Matchers.is(savepointId));
}
use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class JobMasterTest method testCheckpointPrecedesSavepointRecovery.
/**
* Tests that an existing checkpoint will have precedence over an savepoint.
*/
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {
// create savepoint data
final long savepointId = 42L;
final File savepointFile = createSavepoint(savepointId);
// set savepoint settings
final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" + savepointFile.getAbsolutePath(), true);
final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
final long checkpointId = 1L;
final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(jobGraph.getJobID(), checkpointId, 1L, 1L, Collections.emptyMap(), null, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new DummyCheckpointStorageLocation());
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
completedCheckpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, new CheckpointsCleaner(), () -> {
});
final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
try {
// starting the JobMaster should have read the savepoint
final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
assertThat(savepointCheckpoint, Matchers.notNullValue());
assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations