use of org.apache.flink.util.function.TriConsumer in project flink by apache.
the class TaskExecutorTest method runJobManagerHeartbeatTest.
private void runJobManagerHeartbeatTest(ResourceID jmResourceId, HeartbeatServices heartbeatServices, Consumer<TestingJobMasterGatewayBuilder> jobMasterGatewayBuilderConsumer, TriConsumer<ResourceID, TaskExecutorGateway, AllocationID> heartbeatAction) throws IOException, InterruptedException, ExecutionException, TimeoutException {
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final OneShotLatch slotOfferedLatch = new OneShotLatch();
final CompletableFuture<ResourceID> disconnectTaskManagerFuture = new CompletableFuture<>();
final TestingJobMasterGatewayBuilder testingJobMasterGatewayBuilder = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> {
registrationAttempts.countDown();
return CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jmResourceId));
}).setDisconnectTaskManagerFunction(resourceID -> {
disconnectTaskManagerFuture.complete(resourceID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
});
jobMasterGatewayBuilderConsumer.accept(testingJobMasterGatewayBuilder);
final TestingJobMasterGateway jobMasterGateway = testingJobMasterGatewayBuilder.build();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices, heartbeatServices);
final OneShotLatch slotReportReceived = new OneShotLatch();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
slotReportReceived.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
});
final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>();
registrationResponses.add(CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))));
registrationResponses.add(new CompletableFuture<>());
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> registrationResponses.poll());
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
try {
taskManager.start();
taskManager.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
slotReportReceived.await();
final AllocationID allocationId = new AllocationID();
requestSlot(taskExecutorGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterAddress, testingResourceManagerGateway.getFencingToken());
// now inform the task manager about the new job leader
jobManagerLeaderRetriever.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and
// jm
slotOfferedLatch.await();
heartbeatAction.accept(unresolvedTaskManagerLocation.getResourceID(), taskExecutorGateway, allocationId);
// the timeout should trigger disconnecting from the JobManager
final ResourceID resourceID = disconnectTaskManagerFuture.get();
assertThat(resourceID, equalTo(unresolvedTaskManagerLocation.getResourceID()));
assertTrue("The TaskExecutor should try to reconnect to the JM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
use of org.apache.flink.util.function.TriConsumer in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion.
private void testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion(TriConsumer<TaskExecutorGateway, JobID, ResultPartitionID> releaseOrPromoteCall) throws Exception {
final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setDisconnectTaskManagerFunction(resourceID -> {
disconnectFuture.complete(null);
return CompletableFuture.completedFuture(Acknowledge.get());
}).build();
final DefaultJobTable jobTable = DefaultJobTable.create();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setJobTable(jobTable).setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build()).setTaskSlotTable(createTaskSlotTable()).build();
final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();
final AtomicBoolean trackerIsTrackingPartitions = new AtomicBoolean(false);
partitionTracker.setIsTrackingPartitionsForFunction(jobId -> trackerIsTrackingPartitions.get());
final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(firstReleasePartitionsCallFuture::complete);
final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
final ResultPartitionID resultPartitionId = resultPartitionDeploymentDescriptor.getShuffleDescriptor().getResultPartitionID();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
TaskSubmissionTestEnvironment.registerJobMasterConnection(jobTable, jobId, rpc, jobMasterGateway, new NoOpTaskManagerActions(), timeout, taskExecutor.getMainThreadExecutableForTesting());
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
trackerIsTrackingPartitions.set(true);
assertThat(firstReleasePartitionsCallFuture.isDone(), is(false));
taskExecutorGateway.releaseOrPromotePartitions(jobId, Collections.singleton(new ResultPartitionID()), Collections.emptySet());
// at this point we only know that the TE has entered releasePartitions; we cannot be
// certain whether it
// has already checked whether it should disconnect or not
firstReleasePartitionsCallFuture.get();
// connection should be kept alive since the table still contains partitions
assertThat(disconnectFuture.isDone(), is(false));
trackerIsTrackingPartitions.set(false);
// the TM should check whether partitions are still stored, and afterwards terminate the
// connection
releaseOrPromoteCall.accept(taskExecutorGateway, jobId, resultPartitionId);
disconnectFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
Aggregations