use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class DeclarativeSlotManagerTest method testNotificationAboutNotEnoughResources.
private static void testNotificationAboutNotEnoughResources(boolean withNotificationGracePeriod) throws Exception {
final JobID jobId = new JobID();
final int numRequiredSlots = 3;
final int numExistingSlots = 1;
List<Tuple2<JobID, Collection<ResourceRequirement>>> notEnoughResourceNotifications = new ArrayList<>();
ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().setAllocateResourceFunction(ignored -> false).setNotEnoughResourcesConsumer((jobId1, acquiredResources) -> notEnoughResourceNotifications.add(Tuple2.of(jobId1, acquiredResources))).build();
try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().buildAndStart(ResourceManagerId.generate(), new ManuallyTriggeredScheduledExecutor(), resourceManagerActions)) {
if (withNotificationGracePeriod) {
// this should disable notifications
slotManager.setFailUnfulfillableRequest(false);
}
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway());
final SlotReport slotReport = createSlotReport(taskExecutorResourceId, numExistingSlots);
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
ResourceRequirements resourceRequirements = createResourceRequirements(jobId, numRequiredSlots);
slotManager.processResourceRequirements(resourceRequirements);
if (withNotificationGracePeriod) {
assertThat(notEnoughResourceNotifications, empty());
// re-enable notifications which should also trigger another resource check
slotManager.setFailUnfulfillableRequest(true);
}
assertThat(notEnoughResourceNotifications, hasSize(1));
Tuple2<JobID, Collection<ResourceRequirement>> notification = notEnoughResourceNotifications.get(0);
assertThat(notification.f0, is(jobId));
assertThat(notification.f1, hasItem(ResourceRequirement.create(ResourceProfile.ANY, numExistingSlots)));
// another slot report that does not indicate any changes should not trigger another
// notification
slotManager.reportSlotStatus(taskExecutionConnection.getInstanceID(), slotReport);
assertThat(notEnoughResourceNotifications, hasSize(1));
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class DeclarativeSlotManagerTest method testTaskManagerRegistration.
/**
* Tests that we can register task manager and their slots at the slot manager.
*/
@Test
public void testTaskManagerRegistration() throws Exception {
final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
final ResourceID resourceId = ResourceID.generate();
final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway);
final SlotID slotId1 = new SlotID(resourceId, 0);
final SlotID slotId2 = new SlotID(resourceId, 1);
final SlotReport slotReport = new SlotReport(Arrays.asList(createFreeSlotStatus(slotId1), createFreeSlotStatus(slotId2)));
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
slotManager.registerTaskManager(taskManagerConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
assertThat("The number registered slots does not equal the expected number.", slotManager.getNumberRegisteredSlots(), is(2));
assertNotNull(slotTracker.getSlot(slotId1));
assertNotNull(slotTracker.getSlot(slotId2));
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class DeclarativeSlotManagerTest method testSlotRequestFailure.
/**
* Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call
* fails.
*/
@Test
public void testSlotRequestFailure() throws Exception {
final DefaultSlotTracker slotTracker = new DefaultSlotTracker();
try (final DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotTracker(slotTracker).buildAndStartWithDirectExec()) {
ResourceRequirements requirements = createResourceRequirementsForSingleSlot();
slotManager.processResourceRequirements(requirements);
final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(firstManualSlotRequestResponse);
final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
responseQueue.offer(secondManualSlotRequestResponse);
final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
try {
return responseQueue.take();
} catch (InterruptedException ignored) {
return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
}
}).createTestingTaskExecutorGateway();
final ResourceID taskExecutorResourceId = ResourceID.generate();
final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
final SlotReport slotReport = new SlotReport(createFreeSlotStatus(new SlotID(taskExecutorResourceId, 0)));
slotManager.registerTaskManager(taskExecutionConnection, slotReport, ResourceProfile.ANY, ResourceProfile.ANY);
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();
// fail first request
firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception"));
final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();
assertThat(secondRequest.f1, equalTo(firstRequest.f1));
assertThat(secondRequest.f0, equalTo(firstRequest.f0));
secondManualSlotRequestResponse.complete(Acknowledge.get());
final DeclarativeTaskManagerSlot slot = slotTracker.getSlot(secondRequest.f0);
assertThat(slot.getState(), equalTo(SlotState.ALLOCATED));
assertThat(slot.getJobId(), equalTo(secondRequest.f1));
}
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class ResourceManagerTest method testHeartbeatTimeoutWithTaskExecutor.
@Test
public void testHeartbeatTimeoutWithTaskExecutor() throws Exception {
final ResourceID taskExecutorId = ResourceID.generate();
final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>();
final CompletableFuture<ResourceID> stopWorkerFuture = new CompletableFuture<>();
final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setDisconnectResourceManagerConsumer(disconnectFuture::complete).setHeartbeatResourceManagerFunction(resourceId -> {
heartbeatRequestFuture.complete(resourceId);
return FutureUtils.completedVoidFuture();
}).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
runHeartbeatTimeoutTest(builder -> builder.withStopWorkerFunction((worker) -> {
stopWorkerFuture.complete(worker);
return true;
}), resourceManagerGateway -> {
registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress());
}, resourceManagerResourceId -> {
// might have been completed or not depending whether the timeout was triggered
// first
final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
assertThat(disconnectFuture.get(), instanceOf(TimeoutException.class));
assertThat(stopWorkerFuture.get(), is(taskExecutorId));
});
}
use of org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder in project flink by apache.
the class ActiveResourceManagerTest method testCloseTaskManagerConnectionOnWorkerTerminated.
@Test
public void testCloseTaskManagerConnectionOnWorkerTerminated() throws Exception {
new Context() {
{
final ResourceID tmResourceId = ResourceID.generate();
final CompletableFuture<TaskExecutorProcessSpec> requestWorkerFromDriverFuture = new CompletableFuture<>();
final CompletableFuture<Void> disconnectResourceManagerFuture = new CompletableFuture<>();
final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setDisconnectResourceManagerConsumer((ignore) -> disconnectResourceManagerFuture.complete(null)).createTestingTaskExecutorGateway();
driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
requestWorkerFromDriverFuture.complete(taskExecutorProcessSpec);
return CompletableFuture.completedFuture(tmResourceId);
});
runTest(() -> {
// request a new worker, terminate it after registered
runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC)).thenCompose((ignore) -> registerTaskExecutor(tmResourceId, taskExecutorGateway)).thenRun(() -> runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceId, "terminate for testing")));
// verify task manager connection is closed
disconnectResourceManagerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
});
}
};
}
Aggregations