use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class DeclarativeSlotManagerTest method testFreeSlot.
/**
* Tests that freeing a slot will correctly reset the slot and mark it as a free slot.
*/
@Test
public void testFreeSlot() throws Exception {
final TaskExecutorConnection taskExecutorConnection = createTaskExecutorConnection();
final ResourceID resourceID = taskExecutorConnection.getResourceID();
final SlotID slotId = new SlotID(resourceID, 0);
final SlotReport slotReport = new SlotReport(createAllocatedSlotStatus(slotId));
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
slotManager.registerTaskManager(taskExecutorConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
DeclarativeTaskManagerSlot slot = slotTracker.getSlot(slotId);
assertSame(SlotState.ALLOCATED, slot.getState());
slotManager.freeSlot(slotId, new AllocationID());
assertSame(SlotState.FREE, slot.getState());
assertEquals(1, slotManager.getNumberFreeSlots());
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotRequestFailure.
/**
* Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call
* fails.
*/
@Test
public void testSlotRequestFailure() throws Exception {
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
ResourceRequirements requirements = createResourceRequirementsForSingleSlot();
slotManager.processResourceRequirements(requirements);
final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(firstManualSlotRequestResponse);
final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(secondManualSlotRequestResponse);
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
try {
return responseQueue.take();
} catch (InterruptedException ignored) {
return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
}
}).createTestingTaskExecutorGateway();
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
// fail first request
firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception"));
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
assertThat(secondRequest.f1, equalTo(firstRequest.f1));
assertThat(secondRequest.f0, equalTo(firstRequest.f0));
secondManualSlotRequestResponse.complete(Acknowledge.get());
final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
assertThat(slot.getJobId(), equalTo(secondRequest.f1));
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotRequestRemovedIfTMReportsAllocation.
/**
* Tests that pending request is removed if task executor reports a slot with the same job id.
*/
@Test
public void testSlotRequestRemovedIfTMReportsAllocation() throws Exception {
final ResourceTracker resourceTracker = new DefaultResourceTracker();
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setResourceTracker(resourceTracker).setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
final JobID jobID = new JobID();
slotManager.processResourceRequirements(createResourceRequirementsForSingleSlot(jobID));
final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(firstManualSlotRequestResponse);
final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(secondManualSlotRequestResponse);
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
try {
return responseQueue.take();
} catch (InterruptedException ignored) {
return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
}
}).createTestingTaskExecutorGateway();
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
// fail first request
firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
// fail second request
secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", new AllocationID(), jobID));
assertThat(firstRequest.f1, equalTo(jobID));
assertThat(secondRequest.f1, equalTo(jobID));
assertThat(secondRequest.f0, equalTo(firstRequest.f0));
final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
assertThat(slot.getJobId(), equalTo(firstRequest.f1));
assertThat(slotManager.getNumberRegisteredSlots(), is(1));
assertThat(getTotalResourceCount(resourceTracker.getAcquiredResources(jobID)), is(1));
}
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class DefaultSchedulerLocalRecoveryITCase method assertNonLocalRecoveredTasksEquals.
private void assertNonLocalRecoveredTasksEquals(ArchivedExecutionGraph graph, int expected) {
int nonLocalRecoveredTasks = 0;
for (ArchivedExecutionVertex vertex : graph.getAllExecutionVertices()) {
int currentAttemptNumber = vertex.getCurrentExecutionAttempt().getAttemptNumber();
if (currentAttemptNumber == 0) {
// the task had never restarted and do not need to recover
continue;
}
AllocationID priorAllocation = vertex.getPriorExecutionAttempt(currentAttemptNumber - 1).getAssignedAllocationID();
AllocationID currentAllocation = vertex.getCurrentExecutionAttempt().getAssignedAllocationID();
assertNotNull(priorAllocation);
assertNotNull(currentAllocation);
if (!currentAllocation.equals(priorAllocation)) {
nonLocalRecoveredTasks++;
}
}
assertThat(nonLocalRecoveredTasks, is(expected));
}
use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.
the class DefaultSlotTrackerTest method testSlotStatusProcessing.
@Test
public void testSlotStatusProcessing() {
SlotTracker tracker = new DefaultSlotTracker();
SlotID slotId1 = new SlotID(TASK_EXECUTOR_CONNECTION.getResourceID(), 0);
SlotID slotId2 = new SlotID(TASK_EXECUTOR_CONNECTION.getResourceID(), 1);
SlotID slotId3 = new SlotID(TASK_EXECUTOR_CONNECTION.getResourceID(), 2);
tracker.addSlot(slotId1, ResourceProfile.ANY, TASK_EXECUTOR_CONNECTION, null);
tracker.addSlot(slotId2, ResourceProfile.ANY, TASK_EXECUTOR_CONNECTION, null);
tracker.addSlot(slotId3, ResourceProfile.ANY, TASK_EXECUTOR_CONNECTION, jobId);
assertThat(tracker.getFreeSlots(), containsInAnyOrder(Arrays.asList(infoWithSlotId(slotId1), infoWithSlotId(slotId2))));
// move slot2 to PENDING
tracker.notifyAllocationStart(slotId2, jobId);
final List<SlotStatus> slotReport = Arrays.asList(new SlotStatus(slotId1, ResourceProfile.ANY, jobId, new AllocationID()), new SlotStatus(slotId2, ResourceProfile.ANY, null, new AllocationID()), new SlotStatus(slotId3, ResourceProfile.ANY, null, new AllocationID()));
assertThat(tracker.notifySlotStatus(slotReport), is(true));
// slot1 should now be allocated; slot2 should continue to be in a pending state; slot3
// should be freed
assertThat(tracker.getFreeSlots(), contains(infoWithSlotId(slotId3)));
// if slot2 is not in a pending state, this will fail with an exception
tracker.notifyAllocationComplete(slotId2, jobId);
final List<SlotStatus> idempotentSlotReport = Arrays.asList(new SlotStatus(slotId1, ResourceProfile.ANY, jobId, new AllocationID()), new SlotStatus(slotId2, ResourceProfile.ANY, jobId, new AllocationID()), new SlotStatus(slotId3, ResourceProfile.ANY, null, new AllocationID()));
assertThat(tracker.notifySlotStatus(idempotentSlotReport), is(false));
}
Aggregations