use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method testReleaseOfJobResourcesIfJobMasterIsNotCorrect.
/**
* Tests that the TaskExecutor releases all of its job resources if the JobMaster is not running
* the specified job. See FLINK-21606.
*/
@Test
public void testReleaseOfJobResourcesIfJobMasterIsNotCorrect() throws Exception {
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).build();
final TestingTaskExecutorPartitionTracker taskExecutorPartitionTracker = new TestingTaskExecutorPartitionTracker();
final CompletableFuture<JobID> jobPartitionsReleaseFuture = new CompletableFuture<>();
// simulate that we have some partitions tracked
taskExecutorPartitionTracker.setIsTrackingPartitionsForFunction(ignored -> true);
taskExecutorPartitionTracker.setStopTrackingAndReleaseAllPartitionsConsumer(jobPartitionsReleaseFuture::complete);
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, taskExecutorPartitionTracker);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationRejection("foobar"))).build();
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final InstanceID registrationId = new InstanceID();
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture);
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
try {
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
taskExecutorIsRegistered.await();
final AllocationID allocationId = new AllocationID();
final SlotID slotId = new SlotID(taskExecutor.getResourceID(), 0);
requestSlot(taskExecutorGateway, jobId, allocationId, slotId, ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
// The JobManager should reject the registration which should release all job resources
// on the TaskExecutor
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// the slot should be freed
assertThat(availableSlotFuture.get().f1, is(slotId));
assertThat(availableSlotFuture.get().f2, is(allocationId));
// all job partitions should be released
assertThat(jobPartitionsReleaseFuture.get(), is(jobId));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class JobVertexThreadInfoTrackerTest method createMockResourceManagerGateway.
private static CompletableFuture<ResourceManagerGateway> createMockResourceManagerGateway() {
// ignored in TestingThreadInfoRequestCoordinator
Function<ResourceID, CompletableFuture<TaskExecutorThreadInfoGateway>> function = (resourceID) -> CompletableFuture.completedFuture(null);
TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setRequestTaskExecutorGatewayFunction(function);
return CompletableFuture.completedFuture(testingResourceManagerGateway);
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class JobMasterTest method testResourceManagerBecomesUnreachableTriggersDisconnect.
@Test
public void testResourceManagerBecomesUnreachableTriggersDisconnect() throws Exception {
final String resourceManagerAddress = "rm";
final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final Queue<CompletableFuture<RegistrationResponse>> connectionResponses = new ArrayDeque<>(2);
connectionResponses.add(CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess()));
connectionResponses.add(new CompletableFuture<>());
resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
registrationAttempts.countDown();
return connectionResponses.poll();
});
resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
resourceManagerGateway.setJobMasterHeartbeatFunction(ignored -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "resource manager is unreachable")));
rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
// define a leader and see that a registration happens
rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
CommonTestUtils.waitUntilCondition(() -> {
jobMasterGateway.heartbeatFromResourceManager(rmResourceId);
return disconnectedJobManagerFuture.isDone();
}, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)), 50L);
// heartbeat timeout should trigger disconnect JobManager from ResourceManager
assertThat(disconnectedJobManagerFuture.join(), equalTo(jobGraph.getJobID()));
// the JobMaster should try to reconnect to the RM
registrationAttempts.await();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class JobMasterTest method createAndRegisterTestingResourceManagerGateway.
@Nonnull
private TestingResourceManagerGateway createAndRegisterTestingResourceManagerGateway() {
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
rpcService.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
return testingResourceManagerGateway;
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class SlotPoolBatchSlotRequestTest method testPendingBatchSlotRequestDoesNotFailIfResourceDeclaringFails.
/**
* Tests that a batch slot request won't fail if its resource manager request fails with
* exceptions other than {@link UnfulfillableSlotRequestException}.
*/
@Test
public void testPendingBatchSlotRequestDoesNotFailIfResourceDeclaringFails() throws Exception {
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setDeclareRequiredResourcesFunction((jobMasterId, resourceRequirements) -> FutureUtils.completedExceptionally(new FlinkException("Failed request")));
final Time batchSlotTimeout = Time.milliseconds(1000L);
try (final SlotPool slotPool = createAndSetUpSlotPool(mainThreadExecutor, testingResourceManagerGateway, batchSlotTimeout)) {
final CompletableFuture<PhysicalSlot> slotFuture = SlotPoolUtils.requestNewAllocatedBatchSlot(slotPool, mainThreadExecutor, resourceProfile);
assertThat(slotFuture, FlinkMatchers.willNotComplete(Duration.ofMillis(50L)));
}
}
Aggregations