use of org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration in project flink by apache.
the class TaskExecutorTest method testTriggerRegistrationOnLeaderChange.
@Test
public void testTriggerRegistrationOnLeaderChange() throws Exception {
final UUID leaderId1 = UUID.randomUUID();
final UUID leaderId2 = UUID.randomUUID();
// register the mock resource manager gateways
final CompletableFuture<TaskExecutorRegistration> rmGateway1TaskExecutorRegistration = new CompletableFuture<>();
TestingResourceManagerGateway rmGateway1 = new TestingResourceManagerGateway();
rmGateway1.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
rmGateway1TaskExecutorRegistration.complete(taskExecutorRegistration);
return createRegistrationResponse(rmGateway1);
});
final CompletableFuture<TaskExecutorRegistration> rmGateway2TaskExecutorRegistration = new CompletableFuture<>();
TestingResourceManagerGateway rmGateway2 = new TestingResourceManagerGateway();
rmGateway2.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
rmGateway2TaskExecutorRegistration.complete(taskExecutorRegistration);
return createRegistrationResponse(rmGateway2);
});
rpc.registerGateway(rmGateway1.getAddress(), rmGateway1);
rpc.registerGateway(rmGateway2.getAddress(), rmGateway2);
final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable.<Task>newBuilder().createSlotReportSupplier(SlotReport::new).closeAsyncReturns(CompletableFuture.completedFuture(null)).build();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).build();
TaskExecutor taskManager = createTaskExecutor(taskManagerServices);
try {
taskManager.start();
String taskManagerAddress = taskManager.getAddress();
// no connection initially, since there is no leader
assertNull(taskManager.getResourceManagerConnection());
// define a leader and see that a registration happens
resourceManagerLeaderRetriever.notifyListener(rmGateway1.getAddress(), leaderId1);
final TaskExecutorRegistration taskExecutorRegistration1 = rmGateway1TaskExecutorRegistration.join();
assertThat(taskExecutorRegistration1.getTaskExecutorAddress(), is(taskManagerAddress));
assertThat(taskExecutorRegistration1.getResourceId(), is(unresolvedTaskManagerLocation.getResourceID()));
assertNotNull(taskManager.getResourceManagerConnection());
// cancel the leader
resourceManagerLeaderRetriever.notifyListener(null, null);
// set a new leader, see that a registration happens
resourceManagerLeaderRetriever.notifyListener(rmGateway2.getAddress(), leaderId2);
final TaskExecutorRegistration taskExecutorRegistration2 = rmGateway2TaskExecutorRegistration.join();
assertThat(taskExecutorRegistration2.getTaskExecutorAddress(), is(taskManagerAddress));
assertThat(taskExecutorRegistration2.getResourceId(), is(unresolvedTaskManagerLocation.getResourceID()));
assertNotNull(taskManager.getResourceManagerConnection());
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration in project flink by apache.
the class TaskExecutorTest method runJobManagerHeartbeatTest.
private void runJobManagerHeartbeatTest(ResourceID jmResourceId, HeartbeatServices heartbeatServices, Consumer<TestingJobMasterGatewayBuilder> jobMasterGatewayBuilderConsumer, TriConsumer<ResourceID, TaskExecutorGateway, AllocationID> heartbeatAction) throws IOException, InterruptedException, ExecutionException, TimeoutException {
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final OneShotLatch slotOfferedLatch = new OneShotLatch();
final CompletableFuture<ResourceID> disconnectTaskManagerFuture = new CompletableFuture<>();
final TestingJobMasterGatewayBuilder testingJobMasterGatewayBuilder = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> {
registrationAttempts.countDown();
return CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jmResourceId));
}).setDisconnectTaskManagerFunction(resourceID -> {
disconnectTaskManagerFuture.complete(resourceID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
});
jobMasterGatewayBuilderConsumer.accept(testingJobMasterGatewayBuilder);
final TestingJobMasterGateway jobMasterGateway = testingJobMasterGatewayBuilder.build();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices, heartbeatServices);
final OneShotLatch slotReportReceived = new OneShotLatch();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
slotReportReceived.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
});
final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>();
registrationResponses.add(CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))));
registrationResponses.add(new CompletableFuture<>());
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> registrationResponses.poll());
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
try {
taskManager.start();
taskManager.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
slotReportReceived.await();
final AllocationID allocationId = new AllocationID();
requestSlot(taskExecutorGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterAddress, testingResourceManagerGateway.getFencingToken());
// now inform the task manager about the new job leader
jobManagerLeaderRetriever.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and
// jm
slotOfferedLatch.await();
heartbeatAction.accept(unresolvedTaskManagerLocation.getResourceID(), taskExecutorGateway, allocationId);
// the timeout should trigger disconnecting from the JobManager
final ResourceID resourceID = disconnectTaskManagerFuture.get();
assertThat(resourceID, equalTo(unresolvedTaskManagerLocation.getResourceID()));
assertTrue("The TaskExecutor should try to reconnect to the JM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration in project flink by apache.
the class TaskExecutor method connectToResourceManager.
private void connectToResourceManager() {
assert (resourceManagerAddress != null);
assert (establishedResourceManagerConnection == null);
assert (resourceManagerConnection == null);
log.info("Connecting to ResourceManager {}.", resourceManagerAddress);
final TaskExecutorRegistration taskExecutorRegistration = new TaskExecutorRegistration(getAddress(), getResourceID(), unresolvedTaskManagerLocation.getDataPort(), JMXService.getPort().orElse(-1), hardwareDescription, memoryConfiguration, taskManagerConfiguration.getDefaultSlotResourceProfile(), taskManagerConfiguration.getTotalResourceProfile());
resourceManagerConnection = new TaskExecutorToResourceManagerConnection(log, getRpcService(), taskManagerConfiguration.getRetryingRegistrationConfiguration(), resourceManagerAddress.getAddress(), resourceManagerAddress.getResourceManagerId(), getMainThreadExecutor(), new ResourceManagerRegistrationListener(), taskExecutorRegistration);
resourceManagerConnection.start();
}
Aggregations