use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class JobMasterTest method testRestoringFromSavepoint.
/**
* Tests that a JobMaster will restore the given JobGraph from its savepoint upon initial
* submission.
*/
@Test
public void testRestoringFromSavepoint() throws Exception {
// create savepoint data
final long savepointId = 42L;
final File savepointFile = createSavepoint(savepointId);
// set savepoint settings
final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(savepointFile.getAbsolutePath(), true);
final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHighAvailabilityServices(haServices).createJobMaster();
try {
// we need to start and register the required slots to let the adaptive scheduler
// restore from the savepoint
jobMaster.start();
final OneShotLatch taskSubmitLatch = new OneShotLatch();
registerSlotsAtJobMaster(1, jobMaster.getSelfGateway(JobMasterGateway.class), jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
taskSubmitLatch.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation());
// wait until a task has submitted because this guarantees that the ExecutionGraph has
// been created
taskSubmitLatch.await();
final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
assertThat(savepointCheckpoint, Matchers.notNullValue());
assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore in project flink by apache.
the class AdaptiveSchedulerTest method runExceptionHistoryTests.
private Iterable<RootExceptionHistoryEntry> runExceptionHistoryTests(BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic, Consumer<AdaptiveSchedulerBuilder> setupScheduler, Consumer<JobGraph> setupJobGraph) throws Exception {
final int numAvailableSlots = 4;
final JobGraph jobGraph = createJobGraph();
setupJobGraph.accept(jobGraph);
RunFailedJobListener listener = new RunFailedJobListener();
List<ExecutionAttemptID> cancelledTasks = new ArrayList<>();
final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
final DefaultDeclarativeSlotPool declarativeSlotPool = createDeclarativeSlotPool(jobGraph.getJobID());
final Configuration configuration = new Configuration();
configuration.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofMillis(1L));
AdaptiveSchedulerBuilder builder = new AdaptiveSchedulerBuilder(jobGraph, singleThreadMainThreadExecutor).setJobMasterConfiguration(configuration).setDeclarativeSlotPool(declarativeSlotPool).setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner).setJobStatusListener(listener);
setupScheduler.accept(builder);
final AdaptiveScheduler scheduler = builder.build();
final SubmissionBufferingTaskManagerGateway taskManagerGateway = new SubmissionBufferingTaskManagerGateway(numAvailableSlots);
taskManagerGateway.setCancelConsumer(cancelledTasks::add);
singleThreadMainThreadExecutor.execute(() -> {
scheduler.startScheduling();
offerSlots(declarativeSlotPool, createSlotOffersForResourceRequirements(ResourceCounter.withResource(ResourceProfile.UNKNOWN, numAvailableSlots)), taskManagerGateway);
});
listener.waitForRunning();
CompletableFuture<Iterable<ArchivedExecutionVertex>> vertexFuture = new CompletableFuture<>();
singleThreadMainThreadExecutor.execute(() -> vertexFuture.complete(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices()));
final Iterable<ArchivedExecutionVertex> executionVertices = vertexFuture.get();
final List<ExecutionAttemptID> attemptIds = IterableUtils.toStream(executionVertices).map(ArchivedExecutionVertex::getCurrentExecutionAttempt).map(ArchivedExecution::getAttemptId).collect(Collectors.toList());
CompletableFuture<Void> runTestLogicFuture = CompletableFuture.runAsync(() -> testLogic.accept(scheduler, attemptIds), singleThreadMainThreadExecutor);
runTestLogicFuture.get();
Consumer<ExecutionAttemptID> canceller = attemptId -> scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.CANCELED, null)));
CompletableFuture<Void> cancelFuture = CompletableFuture.runAsync(() -> cancelledTasks.forEach(canceller), singleThreadMainThreadExecutor);
cancelFuture.get();
listener.waitForTerminal();
return scheduler.requestJob().getExceptionHistory();
}
Aggregations