use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testCheckpointPrecedesSavepointRecovery.
/**
* Tests that an existing checkpoint will have precedence over an savepoint.
*/
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {
// create savepoint data
final long savepointId = 42L;
final File savepointFile = createSavepoint(savepointId);
// set savepoint settings
final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" + savepointFile.getAbsolutePath(), true);
final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
final long checkpointId = 1L;
final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(jobGraph.getJobID(), checkpointId, 1L, 1L, Collections.emptyMap(), null, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new DummyCheckpointStorageLocation());
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
completedCheckpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, new CheckpointsCleaner(), () -> {
});
final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
try {
// starting the JobMaster should have read the savepoint
final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
assertThat(savepointCheckpoint, Matchers.notNullValue());
assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.
/**
* Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
* information about the allocated slots on the {@link JobMaster}.
*
* <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
* for FLINK-12863.
*/
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
try {
if (hasReceivedSlotOffers.isTriggered()) {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
} else {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
}
} catch (AssertionError e) {
assertionFuture.completeExceptionally(e);
}
if (terminateHeartbeatVerification.get()) {
assertionFuture.complete(null);
}
return FutureUtils.completedVoidFuture();
}).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
terminateHeartbeatVerification.set(true);
// make sure that no assertion has been violated
assertionFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
jobManagerSharedServices.shutdown();
}
}
use of org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder in project flink by apache.
the class JobMasterTest method testRestoringFromSavepoint.
/**
* Tests that a JobMaster will restore the given JobGraph from its savepoint upon initial
* submission.
*/
@Test
public void testRestoringFromSavepoint() throws Exception {
// create savepoint data
final long savepointId = 42L;
final File savepointFile = createSavepoint(savepointId);
// set savepoint settings
final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(savepointFile.getAbsolutePath(), true);
final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHighAvailabilityServices(haServices).createJobMaster();
try {
// we need to start and register the required slots to let the adaptive scheduler
// restore from the savepoint
jobMaster.start();
final OneShotLatch taskSubmitLatch = new OneShotLatch();
registerSlotsAtJobMaster(1, jobMaster.getSelfGateway(JobMasterGateway.class), jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
taskSubmitLatch.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation());
// wait until a task has submitted because this guarantees that the ExecutionGraph has
// been created
taskSubmitLatch.await();
final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
assertThat(savepointCheckpoint, Matchers.notNullValue());
assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations