use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class JobMasterTest method testJobMasterDisconnectsOldTaskExecutorIfNewSessionIsSeen.
@Test
public void testJobMasterDisconnectsOldTaskExecutorIfNewSessionIsSeen() throws Exception {
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
final CompletableFuture<Void> firstTaskExecutorDisconnectedFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway firstTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setAddress("firstTaskExecutor").setDisconnectJobManagerConsumer((jobID, throwable) -> firstTaskExecutorDisconnectedFuture.complete(null)).createTestingTaskExecutorGateway();
final TestingTaskExecutorGateway secondTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setAddress("secondTaskExecutor").createTestingTaskExecutorGateway();
rpcService.registerGateway(firstTaskExecutorGateway.getAddress(), firstTaskExecutorGateway);
rpcService.registerGateway(secondTaskExecutorGateway.getAddress(), secondTaskExecutorGateway);
try {
jobMaster.start();
final LocalUnresolvedTaskManagerLocation taskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final UUID firstTaskManagerSessionId = UUID.randomUUID();
final CompletableFuture<RegistrationResponse> firstRegistrationResponse = jobMaster.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(firstTaskExecutorGateway.getAddress(), taskManagerLocation, firstTaskManagerSessionId), testingTimeout);
assertThat(firstRegistrationResponse.get(), instanceOf(JMTMRegistrationSuccess.class));
final UUID secondTaskManagerSessionId = UUID.randomUUID();
final CompletableFuture<RegistrationResponse> secondRegistrationResponse = jobMaster.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(secondTaskExecutorGateway.getAddress(), taskManagerLocation, secondTaskManagerSessionId), testingTimeout);
assertThat(secondRegistrationResponse.get(), instanceOf(JMTMRegistrationSuccess.class));
// the first TaskExecutor should be disconnected
firstTaskExecutorDisconnectedFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class JobMasterTest method runJobFailureWhenTaskExecutorTerminatesTest.
private void runJobFailureWhenTaskExecutorTerminatesTest(HeartbeatServices heartbeatServices, BiConsumer<LocalUnresolvedTaskManagerLocation, JobMasterGateway> jobReachedRunningState, BiFunction<JobMasterGateway, ResourceID, BiFunction<ResourceID, AllocatedSlotReport, CompletableFuture<Void>>> heartbeatConsumerFunction) throws Exception {
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final JobMasterBuilder.TestingOnCompletionActions onCompletionActions = new JobMasterBuilder.TestingOnCompletionActions();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).withOnCompletionActions(onCompletionActions).createJobMaster();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();
final CompletableFuture<ExecutionAttemptID> taskDeploymentFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
taskDeploymentFuture.complete(taskDeploymentDescriptor.getExecutionAttemptId());
return CompletableFuture.completedFuture(Acknowledge.get());
}).setHeartbeatJobManagerFunction(heartbeatConsumerFunction.apply(jobMasterGateway, taskManagerUnresolvedLocation.getResourceID())).createTestingTaskExecutorGateway();
final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, jobGraph.getJobID(), taskExecutorGateway, taskManagerUnresolvedLocation);
assertThat(slotOffers, hasSize(1));
final ExecutionAttemptID executionAttemptId = taskDeploymentFuture.get();
jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(executionAttemptId, ExecutionState.INITIALIZING)).get();
jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(executionAttemptId, ExecutionState.RUNNING)).get();
jobReachedRunningState.accept(taskManagerUnresolvedLocation, jobMasterGateway);
final ArchivedExecutionGraph archivedExecutionGraph = onCompletionActions.getJobReachedGloballyTerminalStateFuture().get().getArchivedExecutionGraph();
assertThat(archivedExecutionGraph.getState(), is(JobStatus.FAILED));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class JobMasterTest method testJobMasterAcknowledgesDuplicateTaskExecutorRegistrations.
@Test
public void testJobMasterAcknowledgesDuplicateTaskExecutorRegistrations() throws Exception {
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).createJobMaster();
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
rpcService.registerGateway(testingTaskExecutorGateway.getAddress(), testingTaskExecutorGateway);
try {
jobMaster.start();
final TaskManagerRegistrationInformation taskManagerRegistrationInformation = TaskManagerRegistrationInformation.create(testingTaskExecutorGateway.getAddress(), new LocalUnresolvedTaskManagerLocation(), UUID.randomUUID());
final CompletableFuture<RegistrationResponse> firstRegistrationResponse = jobMaster.registerTaskManager(jobGraph.getJobID(), taskManagerRegistrationInformation, testingTimeout);
final CompletableFuture<RegistrationResponse> secondRegistrationResponse = jobMaster.registerTaskManager(jobGraph.getJobID(), taskManagerRegistrationInformation, testingTimeout);
assertThat(firstRegistrationResponse.get(), instanceOf(JMTMRegistrationSuccess.class));
assertThat(secondRegistrationResponse.get(), instanceOf(JMTMRegistrationSuccess.class));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class JobMasterTest method testAllocatedSlotReportDoesNotContainStaleInformation.
/**
* Tests that the {@link AllocatedSlotReport} contains up to date information and not stale
* information about the allocated slots on the {@link JobMaster}.
*
* <p>This is a probabilistic test case which only fails if executed repeatedly without the fix
* for FLINK-12863.
*/
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setHeartbeatJobManagerFunction((taskManagerId, allocatedSlotReport) -> {
try {
if (hasReceivedSlotOffers.isTriggered()) {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
} else {
assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
}
} catch (AssertionError e) {
assertionFuture.completeExceptionally(e);
}
if (terminateHeartbeatVerification.get()) {
assertionFuture.complete(null);
}
return FutureUtils.completedVoidFuture();
}).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();
final JobGraph jobGraph = JobGraphTestUtils.singleNoOpJobGraph();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHeartbeatServices(new HeartbeatServices(5L, 1000L)).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(new TestingSlotPoolFactory(hasReceivedSlotOffers), new DefaultSchedulerFactory())).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);
assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));
terminateHeartbeatVerification.set(true);
// make sure that no assertion has been violated
assertionFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
jobManagerSharedServices.shutdown();
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class JobMasterTest method testRestoringFromSavepoint.
/**
* Tests that a JobMaster will restore the given JobGraph from its savepoint upon initial
* submission.
*/
@Test
public void testRestoringFromSavepoint() throws Exception {
// create savepoint data
final long savepointId = 42L;
final File savepointFile = createSavepoint(savepointId);
// set savepoint settings
final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(savepointFile.getAbsolutePath(), true);
final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
final CheckpointRecoveryFactory testingCheckpointRecoveryFactory = PerJobCheckpointRecoveryFactory.withoutCheckpointStoreRecovery(maxCheckpoints -> completedCheckpointStore);
haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withHighAvailabilityServices(haServices).createJobMaster();
try {
// we need to start and register the required slots to let the adaptive scheduler
// restore from the savepoint
jobMaster.start();
final OneShotLatch taskSubmitLatch = new OneShotLatch();
registerSlotsAtJobMaster(1, jobMaster.getSelfGateway(JobMasterGateway.class), jobGraph.getJobID(), new TestingTaskExecutorGatewayBuilder().setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
taskSubmitLatch.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
}).createTestingTaskExecutorGateway(), new LocalUnresolvedTaskManagerLocation());
// wait until a task has submitted because this guarantees that the ExecutionGraph has
// been created
taskSubmitLatch.await();
final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();
assertThat(savepointCheckpoint, Matchers.notNullValue());
assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations