use of org.apache.flink.runtime.resourcemanager.ResourceManagerId in project flink by apache.
the class DeclarativeSlotManagerTest method testDuplicateResourceRequirementDeclarationAfterSuccessfulAllocation.
/**
* Tests that duplicate resource requirement declaration do not result in additional slots being
* allocated after a pending slot request has been fulfilled but not yet freed.
*/
@Test
public void testDuplicateResourceRequirementDeclarationAfterSuccessfulAllocation() throws Exception {
final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
final AtomicInteger allocateResourceCalls = new AtomicInteger(0);
final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().setAllocateResourceConsumer(ignored -> allocateResourceCalls.incrementAndGet()).build();
ResourceRequirements requirements = createResourceRequirementsForSingleSlot();
final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
final ResourceID resourceID = ResourceID.generate();
final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway);
final SlotID slotId = new SlotID(resourceID, 0);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(slotId));
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec(resourceManagerId, resourceManagerActions)) {
slotManager.registerTaskManager(taskManagerConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
slotManager.processResourceRequirements(requirements);
DeclarativeTaskManagerSlot slot = slotTracker.getSlot(slotId);
assertThat(slot.getState(), is(SlotState.ALLOCATED));
slotManager.processResourceRequirements(requirements);
}
// check that we have only called the resource allocation only for the first slot request,
// since the second request is a duplicate
assertThat(allocateResourceCalls.get(), is(0));
}
use of org.apache.flink.runtime.resourcemanager.ResourceManagerId in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotRequestRemovedIfTMReportsAllocation.
/**
* Tests that pending request is removed if task executor reports a slot with the same job id.
*/
@Test
public void testSlotRequestRemovedIfTMReportsAllocation() throws Exception {
final ResourceTracker resourceTracker = new DefaultResourceTracker();
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setResourceTracker(resourceTracker).setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
final JobID jobID = new JobID();
slotManager.processResourceRequirements(createResourceRequirementsForSingleSlot(jobID));
final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(firstManualSlotRequestResponse);
final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(secondManualSlotRequestResponse);
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
try {
return responseQueue.take();
} catch (InterruptedException ignored) {
return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
}
}).createTestingTaskExecutorGateway();
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
// fail first request
firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
// fail second request
secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", new AllocationID(), jobID));
assertThat(firstRequest.f1, equalTo(jobID));
assertThat(secondRequest.f1, equalTo(jobID));
assertThat(secondRequest.f0, equalTo(firstRequest.f0));
final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
assertThat(slot.getJobId(), equalTo(firstRequest.f1));
assertThat(slotManager.getNumberRegisteredSlots(), is(1));
assertThat(getTotalResourceCount(resourceTracker.getAcquiredResources(jobID)), is(1));
}
}
use of org.apache.flink.runtime.resourcemanager.ResourceManagerId in project flink by apache.
the class DefaultSlotStatusSyncerTest method testAllocateSlot.
@Test
public void testAllocateSlot() throws Exception {
final FineGrainedTaskManagerTracker taskManagerTracker = new FineGrainedTaskManagerTracker();
final CompletableFuture<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestFuture = new CompletableFuture<>();
final CompletableFuture<Acknowledge> responseFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(tuple6 -> {
requestFuture.complete(tuple6);
return responseFuture;
}).createTestingTaskExecutorGateway();
final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(ResourceID.generate(), taskExecutorGateway);
taskManagerTracker.addTaskManager(taskExecutorConnection, ResourceProfile.ANY, ResourceProfile.ANY);
final ResourceTracker resourceTracker = new DefaultResourceTracker();
final JobID jobId = new JobID();
final SlotStatusSyncer slotStatusSyncer = new DefaultSlotStatusSyncer(TASK_MANAGER_REQUEST_TIMEOUT);
slotStatusSyncer.initialize(taskManagerTracker, resourceTracker, ResourceManagerId.generate(), TestingUtils.defaultExecutor());
final CompletableFuture<Void> allocatedFuture = slotStatusSyncer.allocateSlot(taskExecutorConnection.getInstanceID(), jobId, "address", ResourceProfile.ANY);
final AllocationID allocationId = requestFuture.get().f2;
assertThat(resourceTracker.getAcquiredResources(jobId), contains(ResourceRequirement.create(ResourceProfile.ANY, 1)));
assertTrue(taskManagerTracker.getAllocatedOrPendingSlot(allocationId).isPresent());
assertThat(taskManagerTracker.getAllocatedOrPendingSlot(allocationId).get().getJobId(), is(jobId));
assertThat(taskManagerTracker.getAllocatedOrPendingSlot(allocationId).get().getState(), is(SlotState.PENDING));
responseFuture.complete(Acknowledge.get());
assertFalse(allocatedFuture.isCompletedExceptionally());
}
use of org.apache.flink.runtime.resourcemanager.ResourceManagerId in project flink by apache.
the class JobMaster method establishResourceManagerConnection.
private void establishResourceManagerConnection(final JobMasterRegistrationSuccess success) {
final ResourceManagerId resourceManagerId = success.getResourceManagerId();
// verify the response with current connection
if (resourceManagerConnection != null && Objects.equals(resourceManagerConnection.getTargetLeaderId(), resourceManagerId)) {
log.info("JobManager successfully registered at ResourceManager, leader id: {}.", resourceManagerId);
final ResourceManagerGateway resourceManagerGateway = resourceManagerConnection.getTargetGateway();
final ResourceID resourceManagerResourceId = success.getResourceManagerResourceId();
establishedResourceManagerConnection = new EstablishedResourceManagerConnection(resourceManagerGateway, resourceManagerResourceId);
slotPoolService.connectToResourceManager(resourceManagerGateway);
resourceManagerHeartbeatManager.monitorTarget(resourceManagerResourceId, new ResourceManagerHeartbeatReceiver(resourceManagerGateway));
} else {
log.debug("Ignoring resource manager connection to {} because it's duplicated or outdated.", resourceManagerId);
}
}
use of org.apache.flink.runtime.resourcemanager.ResourceManagerId in project flink by apache.
the class TaskExecutorTest method testReleasingJobResources.
@Test
public void testReleasingJobResources() throws Exception {
AllocationID[] slots = range(0, 5).mapToObj(i -> new AllocationID()).toArray(AllocationID[]::new);
try (TaskExecutorTestingContext ctx = createTaskExecutorTestingContext(slots.length)) {
ctx.start();
ResourceManagerId rmId;
{
CompletableFuture<Tuple3<ResourceID, InstanceID, SlotReport>> initialSlotReportFuture = new CompletableFuture<>();
rmId = createAndRegisterResourceManager(initialSlotReportFuture);
initialSlotReportFuture.get();
}
TaskExecutorGateway tm = ctx.taskExecutor.getSelfGateway(TaskExecutorGateway.class);
for (int i = 0; i < slots.length; i++) {
requestSlot(tm, jobId, slots[i], buildSlotID(i), ResourceProfile.UNKNOWN, ctx.jobMasterGateway.getAddress(), rmId);
}
ctx.offerSlotsLatch.await();
ExecutionAttemptID exec = submit(slots[0], ctx.jobMasterGateway, tm, BlockingNoOpInvokable.class);
assertNotNull(ctx.changelogStoragesManager.getChangelogStoragesByJobId(jobId));
assertNotNull(ctx.metricGroup.getJobMetricsGroup(jobId));
// cancel tasks before releasing the slots - so that TM will release job resources on
// the last slot release
tm.cancelTask(exec, timeout).get();
// (taskSlotTable isn't thread safe - using MainThread)
while (callInMain(ctx, () -> ctx.taskSlotTable.getTasks(jobId).hasNext())) {
Thread.sleep(50);
}
for (int i = 0; i < slots.length; i++) {
tm.freeSlot(slots[i], new RuntimeException("test exception"), timeout).get();
boolean isLastSlot = i == slots.length - 1;
assertEquals(isLastSlot, null == callInMain(ctx, () -> ctx.metricGroup.getJobMetricsGroup(jobId)));
assertEquals(isLastSlot, null == callInMain(ctx, () -> ctx.changelogStoragesManager.getChangelogStoragesByJobId(jobId)));
}
}
}
Aggregations