Search in sources :

Example 11 with ResourceRequirements

use of org.apache.flink.runtime.slots.ResourceRequirements in project flink by apache.

the class DeclarativeSlotManagerTest method testSpreadOutSlotAllocationStrategy.

/**
 * The spread out slot allocation strategy should spread out the allocated slots across all
 * available TaskExecutors. See FLINK-12122.
 */
@Test
public void testSpreadOutSlotAllocationStrategy() throws Exception {
    try (DeclarativeSlotManager slotManager = createDeclarativeSlotManagerBuilder().setSlotMatchingStrategy(LeastUtilizationSlotMatchingStrategy.INSTANCE).buildAndStartWithDirectExec()) {
        final List<CompletableFuture<JobID>> requestSlotFutures = new ArrayList<>();
        final int numberTaskExecutors = 5;
        // register n TaskExecutors with 2 slots each
        for (int i = 0; i < numberTaskExecutors; i++) {
            final CompletableFuture<JobID> requestSlotFuture = new CompletableFuture<>();
            requestSlotFutures.add(requestSlotFuture);
            registerTaskExecutorWithTwoSlots(slotManager, requestSlotFuture);
        }
        final JobID jobId = new JobID();
        final ResourceRequirements resourceRequirements = createResourceRequirements(jobId, numberTaskExecutors);
        slotManager.processResourceRequirements(resourceRequirements);
        // check that every TaskExecutor has received a slot request
        final Set<JobID> jobIds = new HashSet<>(FutureUtils.combineAll(requestSlotFutures).get(10L, TimeUnit.SECONDS));
        assertThat(jobIds, hasSize(1));
        assertThat(jobIds, containsInAnyOrder(jobId));
    }
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with ResourceRequirements

use of org.apache.flink.runtime.slots.ResourceRequirements in project flink by apache.

the class FineGrainedSlotManagerTest method testRequirementCheckOnlyTriggeredOnce.

/**
 * Test that checkResourceRequirements will only be triggered once after multiple trigger
 * function calls.
 */
@Test
public void testRequirementCheckOnlyTriggeredOnce() throws Exception {
    new Context() {

        {
            final List<CompletableFuture<Void>> checkRequirementFutures = new ArrayList<>();
            checkRequirementFutures.add(new CompletableFuture<>());
            checkRequirementFutures.add(new CompletableFuture<>());
            final long requirementCheckDelay = 50;
            resourceAllocationStrategyBuilder.setTryFulfillRequirementsFunction((ignored1, ignored2) -> {
                if (checkRequirementFutures.get(0).isDone()) {
                    checkRequirementFutures.get(1).complete(null);
                } else {
                    checkRequirementFutures.get(0).complete(null);
                }
                return ResourceAllocationResult.builder().build();
            });
            setRequirementCheckDelay(requirementCheckDelay);
            runTest(() -> {
                final ResourceRequirements resourceRequirements1 = createResourceRequirementsForSingleSlot();
                final ResourceRequirements resourceRequirements2 = createResourceRequirementsForSingleSlot();
                final ResourceRequirements resourceRequirements3 = createResourceRequirementsForSingleSlot();
                final TaskExecutorConnection taskExecutionConnection = createTaskExecutorConnection();
                final CompletableFuture<Void> registrationFuture = new CompletableFuture<>();
                final long start = System.nanoTime();
                runInMainThread(() -> {
                    getSlotManager().processResourceRequirements(resourceRequirements1);
                    getSlotManager().processResourceRequirements(resourceRequirements2);
                    getSlotManager().registerTaskManager(taskExecutionConnection, new SlotReport(), DEFAULT_TOTAL_RESOURCE_PROFILE, DEFAULT_SLOT_RESOURCE_PROFILE);
                    registrationFuture.complete(null);
                });
                assertFutureCompleteAndReturn(registrationFuture);
                final long registrationTime = (System.nanoTime() - start) / 1_000_000;
                assumeTrue("The time of process requirement and register task manager must not take longer than the requirement check delay. If it does, then this indicates a very slow machine.", registrationTime < requirementCheckDelay);
                assertFutureCompleteAndReturn(checkRequirementFutures.get(0));
                assertFutureNotComplete(checkRequirementFutures.get(1));
                // checkTimes will not increase when there's no events
                Thread.sleep(requirementCheckDelay * 2);
                assertFutureNotComplete(checkRequirementFutures.get(1));
                // checkTimes will increase again if there's another
                // processResourceRequirements
                runInMainThread(() -> getSlotManager().processResourceRequirements(resourceRequirements3));
                assertFutureCompleteAndReturn(checkRequirementFutures.get(1));
            });
        }
    };
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) ArrayList(java.util.ArrayList) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Test(org.junit.Test)

Example 13 with ResourceRequirements

use of org.apache.flink.runtime.slots.ResourceRequirements in project flink by apache.

the class FineGrainedSlotManagerTest method testNotificationAboutNotEnoughResources.

private void testNotificationAboutNotEnoughResources(boolean withNotificationGracePeriod) throws Exception {
    final JobID jobId = new JobID();
    final List<Tuple2<JobID, Collection<ResourceRequirement>>> notEnoughResourceNotifications = new ArrayList<>();
    final CompletableFuture<Void> notifyNotEnoughResourceFuture = new CompletableFuture<>();
    new Context() {

        {
            resourceActionsBuilder.setNotEnoughResourcesConsumer((jobId1, acquiredResources) -> {
                notEnoughResourceNotifications.add(Tuple2.of(jobId1, acquiredResources));
                notifyNotEnoughResourceFuture.complete(null);
            });
            resourceAllocationStrategyBuilder.setTryFulfillRequirementsFunction(((jobIDCollectionMap, taskManagerResourceInfoProvider) -> ResourceAllocationResult.builder().addUnfulfillableJob(jobId).build()));
            runTest(() -> {
                if (withNotificationGracePeriod) {
                    // this should disable notifications
                    runInMainThread(() -> getSlotManager().setFailUnfulfillableRequest(false));
                }
                final ResourceRequirements resourceRequirements = createResourceRequirements(jobId, 1);
                runInMainThread(() -> getSlotManager().processResourceRequirements(resourceRequirements));
                if (withNotificationGracePeriod) {
                    assertFutureNotComplete(notifyNotEnoughResourceFuture);
                    assertThat(notEnoughResourceNotifications, empty());
                    // re-enable notifications which should also trigger another
                    // resource check
                    runInMainThread(() -> getSlotManager().setFailUnfulfillableRequest(true));
                }
                assertFutureCompleteAndReturn(notifyNotEnoughResourceFuture);
                assertThat(notEnoughResourceNotifications, hasSize(1));
                final Tuple2<JobID, Collection<ResourceRequirement>> notification = notEnoughResourceNotifications.get(0);
                assertThat(notification.f0, is(jobId));
            });
        }
    };
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) Matchers.not(org.hamcrest.Matchers.not) Tuple6(org.apache.flink.api.java.tuple.Tuple6) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) CompletableFuture(java.util.concurrent.CompletableFuture) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) BigDecimal(java.math.BigDecimal) TestingMetricRegistry(org.apache.flink.runtime.metrics.util.TestingMetricRegistry) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Matchers.empty(org.hamcrest.Matchers.empty) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) InstanceID(org.apache.flink.runtime.instance.InstanceID) SlotManagerMetricGroup(org.apache.flink.runtime.metrics.groups.SlotManagerMetricGroup) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Consumer(java.util.function.Consumer) List(java.util.List) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) Optional(java.util.Optional) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) Assume.assumeTrue(org.junit.Assume.assumeTrue) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) CompletableFuture(java.util.concurrent.CompletableFuture) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) JobID(org.apache.flink.api.common.JobID)

Example 14 with ResourceRequirements

use of org.apache.flink.runtime.slots.ResourceRequirements in project flink by apache.

the class DeclarativeSlotManager method checkResourceRequirements.

// ---------------------------------------------------------------------------------------------
// Requirement matching
// ---------------------------------------------------------------------------------------------
/**
 * Matches resource requirements against available resources. In a first round requirements are
 * matched against free slot, and any match results in a slot allocation. The remaining
 * unfulfilled requirements are matched against pending slots, allocating more workers if no
 * matching pending slot could be found. If the requirements for a job could not be fulfilled
 * then a notification is sent to the job master informing it as such.
 *
 * <p>Performance notes: At it's core this method loops, for each job, over all free/pending
 * slots for each required slot, trying to find a matching slot. One should generally go in with
 * the assumption that this runs in numberOfJobsRequiringResources * numberOfRequiredSlots *
 * numberOfFreeOrPendingSlots. This is especially important when dealing with pending slots, as
 * matches between requirements and pending slots are not persisted and recomputed on each call.
 * This may required further refinements in the future; e.g., persisting the matches between
 * requirements and pending slots, or not matching against pending slots at all.
 *
 * <p>When dealing with unspecific resource profiles (i.e., {@link ResourceProfile#ANY}/{@link
 * ResourceProfile#UNKNOWN}), then the number of free/pending slots is not relevant because we
 * only need exactly 1 comparison to determine whether a slot can be fulfilled or not, since
 * they are all the same anyway.
 *
 * <p>When dealing with specific resource profiles things can be a lot worse, with the classical
 * cases where either no matches are found, or only at the very end of the iteration. In the
 * absolute worst case, with J jobs, requiring R slots each with a unique resource profile such
 * each pair of these profiles is not matching, and S free/pending slots that don't fulfill any
 * requirement, then this method does a total of J*R*S resource profile comparisons.
 */
private void checkResourceRequirements() {
    final Map<JobID, Collection<ResourceRequirement>> missingResources = resourceTracker.getMissingResources();
    if (missingResources.isEmpty()) {
        return;
    }
    final Map<JobID, ResourceCounter> unfulfilledRequirements = new LinkedHashMap<>();
    for (Map.Entry<JobID, Collection<ResourceRequirement>> resourceRequirements : missingResources.entrySet()) {
        final JobID jobId = resourceRequirements.getKey();
        final ResourceCounter unfulfilledJobRequirements = tryAllocateSlotsForJob(jobId, resourceRequirements.getValue());
        if (!unfulfilledJobRequirements.isEmpty()) {
            unfulfilledRequirements.put(jobId, unfulfilledJobRequirements);
        }
    }
    if (unfulfilledRequirements.isEmpty()) {
        return;
    }
    ResourceCounter pendingSlots = ResourceCounter.withResources(taskExecutorManager.getPendingTaskManagerSlots().stream().collect(Collectors.groupingBy(PendingTaskManagerSlot::getResourceProfile, Collectors.summingInt(x -> 1))));
    for (Map.Entry<JobID, ResourceCounter> unfulfilledRequirement : unfulfilledRequirements.entrySet()) {
        pendingSlots = tryFulfillRequirementsWithPendingSlots(unfulfilledRequirement.getKey(), unfulfilledRequirement.getValue().getResourcesWithCount(), pendingSlots);
    }
}
Also used : WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) ResourceCounter(org.apache.flink.runtime.util.ResourceCounter) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) LinkedHashMap(java.util.LinkedHashMap) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Map(java.util.Map) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) SlotInfo(org.apache.flink.runtime.rest.messages.taskmanager.SlotInfo) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) SlotOccupiedException(org.apache.flink.runtime.taskexecutor.exceptions.SlotOccupiedException) Nullable(javax.annotation.Nullable) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Logger(org.slf4j.Logger) Executor(java.util.concurrent.Executor) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) Set(java.util.Set) InstanceID(org.apache.flink.runtime.instance.InstanceID) SlotManagerMetricGroup(org.apache.flink.runtime.metrics.groups.SlotManagerMetricGroup) Preconditions(org.apache.flink.util.Preconditions) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) MetricNames(org.apache.flink.runtime.metrics.MetricNames) JobID(org.apache.flink.api.common.JobID) TaskExecutorConnection(org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection) SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) Optional(java.util.Optional) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) Collections(java.util.Collections) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) Collection(java.util.Collection) ResourceCounter(org.apache.flink.runtime.util.ResourceCounter) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) JobID(org.apache.flink.api.common.JobID) LinkedHashMap(java.util.LinkedHashMap)

Example 15 with ResourceRequirements

use of org.apache.flink.runtime.slots.ResourceRequirements in project flink by apache.

the class DefaultDeclareResourceRequirementServiceConnectionManagerTest method runStopSendingResourceRequirementsTest.

private void runStopSendingResourceRequirementsTest(Consumer<DeclareResourceRequirementServiceConnectionManager> testAction) throws InterruptedException {
    final DeclareResourceRequirementServiceConnectionManager declareResourceRequirementServiceConnectionManager = createResourceManagerConnectionManager();
    final FailingDeclareResourceRequirementsService declareResourceRequirementsService = new FailingDeclareResourceRequirementsService(1);
    declareResourceRequirementServiceConnectionManager.connect(declareResourceRequirementsService);
    final ResourceRequirements resourceRequirements = createResourceRequirements();
    declareResourceRequirementServiceConnectionManager.declareResourceRequirements(resourceRequirements);
    declareResourceRequirementsService.waitForResourceRequirementsDeclaration();
    testAction.accept(declareResourceRequirementServiceConnectionManager);
    scheduledExecutor.triggerNonPeriodicScheduledTasksWithRecursion();
    assertThat(declareResourceRequirementsService.hasResourceRequirements(), is(false));
}
Also used : ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements)

Aggregations

ResourceRequirements (org.apache.flink.runtime.slots.ResourceRequirements)25 Test (org.junit.Test)22 CompletableFuture (java.util.concurrent.CompletableFuture)20 TaskExecutorConnection (org.apache.flink.runtime.resourcemanager.registration.TaskExecutorConnection)16 SlotReport (org.apache.flink.runtime.taskexecutor.SlotReport)16 JobID (org.apache.flink.api.common.JobID)14 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)14 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)14 TestingTaskExecutorGatewayBuilder (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder)13 ArrayList (java.util.ArrayList)12 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)12 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)12 TestingTaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway)12 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)11 ResourceManagerId (org.apache.flink.runtime.resourcemanager.ResourceManagerId)11 ResourceRequirement (org.apache.flink.runtime.slots.ResourceRequirement)11 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)10 TaskExecutorGateway (org.apache.flink.runtime.taskexecutor.TaskExecutorGateway)10 List (java.util.List)9 Tuple6 (org.apache.flink.api.java.tuple.Tuple6)9