Search in sources :

Example 1 with ResourceConsumption

use of com.netflix.titus.master.service.management.ResourceConsumption in project titus-control-plane by Netflix.

the class ResourceConsumptionEvaluatorTest method batchJobWithMultipleTasks.

@SuppressWarnings("unchecked")
@Test
public void batchJobWithMultipleTasks() {
    when(applicationSlaManagementService.getApplicationSLAs()).thenReturn(asList(ConsumptionModelGenerator.DEFAULT_SLA, ConsumptionModelGenerator.CRITICAL_SLA_1, ConsumptionModelGenerator.NOT_USED_SLA));
    // Job with defined capacity group SLA
    Job<BatchJobExt> goodCapacityJob = newBatchJob("goodCapacityJob", jd -> jd.toBuilder().withExtensions(jd.getExtensions().toBuilder().withSize(2).build()).withCapacityGroup(ConsumptionModelGenerator.CRITICAL_SLA_1.getAppName()).build()).getLeft();
    List<Task> goodCapacityTasks = jobComponentStub.getJobOperations().getTasks(goodCapacityJob.getId());
    // Job without appName defined
    Job<BatchJobExt> noAppNameJob = newBatchJob("badCapacityJob", jd -> jd.toBuilder().withApplicationName("").withExtensions(jd.getExtensions().toBuilder().withSize(2).build()).withCapacityGroup(ConsumptionModelGenerator.DEFAULT_SLA.getAppName()).build()).getLeft();
    List<Task> noAppNameTasks = jobComponentStub.getJobOperations().getTasks(noAppNameJob.getId());
    // Job with capacity group for which SLA is not defined
    Job<BatchJobExt> badCapacityJob = newBatchJob("badCapacityJob", jd -> jd.toBuilder().withExtensions(jd.getExtensions().toBuilder().withSize(2).build()).withCapacityGroup("missingCapacityGroup").build()).getLeft();
    List<Task> badCapacityTasks = jobComponentStub.getJobOperations().getTasks(badCapacityJob.getId());
    // Evaluate
    ResourceConsumptionEvaluator evaluator = new ResourceConsumptionEvaluator(applicationSlaManagementService, v3JobOperations);
    Set<String> undefined = evaluator.getUndefinedCapacityGroups();
    assertThat(undefined).contains("missingCapacityGroup");
    CompositeResourceConsumption systemConsumption = evaluator.getSystemConsumption();
    Map<String, ResourceConsumption> tierConsumptions = systemConsumption.getContributors();
    assertThat(tierConsumptions).containsKeys(Tier.Critical.name(), Tier.Flex.name());
    // Critical capacity group
    CompositeResourceConsumption criticalConsumption = (CompositeResourceConsumption) findConsumption(systemConsumption, Tier.Critical.name(), ConsumptionModelGenerator.CRITICAL_SLA_1.getAppName()).get();
    assertThat(criticalConsumption.getCurrentConsumption()).isEqualTo(expectedCurrentConsumptionForBatchJob(goodCapacityJob, goodCapacityTasks));
    assertThat(criticalConsumption.getMaxConsumption()).isEqualTo(expectedMaxConsumptionForBatchJob(goodCapacityJob));
    assertThat(criticalConsumption.getAllowedConsumption()).isEqualTo(ConsumptionModelGenerator.capacityGroupLimit(ConsumptionModelGenerator.CRITICAL_SLA_1));
    assertThat(criticalConsumption.isAboveLimit()).isTrue();
    // Default capacity group
    CompositeResourceConsumption defaultConsumption = (CompositeResourceConsumption) findConsumption(systemConsumption, Tier.Flex.name(), ConsumptionModelGenerator.DEFAULT_SLA.getAppName()).get();
    assertThat(defaultConsumption.getCurrentConsumption()).isEqualTo(ResourceDimensions.add(expectedCurrentConsumptionForBatchJob(noAppNameJob, noAppNameTasks), expectedCurrentConsumptionForBatchJob(badCapacityJob, badCapacityTasks)));
    assertThat(defaultConsumption.getMaxConsumption()).isEqualTo(ResourceDimensions.add(expectedMaxConsumptionForBatchJob(noAppNameJob), expectedMaxConsumptionForBatchJob(badCapacityJob)));
    assertThat(defaultConsumption.getAllowedConsumption()).isEqualTo(ConsumptionModelGenerator.capacityGroupLimit(ConsumptionModelGenerator.DEFAULT_SLA));
    assertThat(defaultConsumption.isAboveLimit()).isFalse();
    // Not used capacity group
    CompositeResourceConsumption notUsedConsumption = (CompositeResourceConsumption) findConsumption(systemConsumption, Tier.Critical.name(), ConsumptionModelGenerator.NOT_USED_SLA.getAppName()).get();
    assertThat(notUsedConsumption.getCurrentConsumption()).isEqualTo(ResourceDimension.empty());
    assertThat(notUsedConsumption.getAllowedConsumption()).isEqualTo(ConsumptionModelGenerator.capacityGroupLimit(ConsumptionModelGenerator.NOT_USED_SLA));
    assertThat(notUsedConsumption.isAboveLimit()).isFalse();
}
Also used : JobDescriptorGenerator(com.netflix.titus.testkit.model.job.JobDescriptorGenerator) ApplicationSlaManagementService(com.netflix.titus.master.service.management.ApplicationSlaManagementService) Task(com.netflix.titus.api.jobmanager.model.job.Task) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) ResourceDimensions(com.netflix.titus.master.model.ResourceDimensions) Function(java.util.function.Function) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) Pair(com.netflix.titus.common.util.tuple.Pair) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ResourceDimension(com.netflix.titus.api.model.ResourceDimension) ResourceConsumptionEvaluator.perTaskResourceDimension(com.netflix.titus.master.service.management.internal.ResourceConsumptionEvaluator.perTaskResourceDimension) Before(org.junit.Before) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) Job(com.netflix.titus.api.jobmanager.model.job.Job) Set(java.util.Set) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Tier(com.netflix.titus.api.model.Tier) ResourceConsumptions.findConsumption(com.netflix.titus.master.service.management.ResourceConsumptions.findConsumption) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) ContainerResources(com.netflix.titus.api.jobmanager.model.job.ContainerResources) JobComponentStub(com.netflix.titus.testkit.model.job.JobComponentStub) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) Mockito.mock(org.mockito.Mockito.mock) Task(com.netflix.titus.api.jobmanager.model.job.Task) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) Test(org.junit.Test)

Example 2 with ResourceConsumption

use of com.netflix.titus.master.service.management.ResourceConsumption in project titus-control-plane by Netflix.

the class DefaultResourceConsumptionService method notifyAboutResourceConsumptionChange.

private void notifyAboutResourceConsumptionChange(ConsumptionEvaluationResult oldEvaluation) {
    Map<String, ResourceConsumption> newCapacityGroupConsumptions = ResourceConsumptions.groupBy(latestEvaluation.getSystemConsumption(), ConsumptionLevel.CapacityGroup);
    Map<String, ResourceConsumption> oldCapacityGroupConsumptions = oldEvaluation == null ? Collections.emptyMap() : ResourceConsumptions.groupBy(oldEvaluation.getSystemConsumption(), ConsumptionLevel.CapacityGroup);
    long now = worker.now();
    newCapacityGroupConsumptions.values().forEach(newConsumption -> {
        ResourceConsumption previous = oldCapacityGroupConsumptions.get(newConsumption.getConsumerName());
        if (previous == null || !previous.equals(newConsumption)) {
            publishEvent(new CapacityGroupAllocationEvent(newConsumption.getConsumerName(), now, (CompositeResourceConsumption) newConsumption));
        }
    });
}
Also used : CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) CapacityGroupAllocationEvent(com.netflix.titus.master.service.management.ResourceConsumptionEvents.CapacityGroupAllocationEvent)

Example 3 with ResourceConsumption

use of com.netflix.titus.master.service.management.ResourceConsumption in project titus-control-plane by Netflix.

the class ResourceConsumptionEvaluator method computeAllocationsByCapacityGroupAndAppName.

/**
 * @return capacityGroups -> apps -> instanceTypes -> consumption
 */
private Pair<Map<String, Map<String, ResourceConsumption>>, Set<String>> computeAllocationsByCapacityGroupAndAppName() {
    Map<String, Map<String, ResourceConsumption>> consumptionMap = new HashMap<>();
    Set<String> undefinedCapacityGroups = new HashSet<>();
    v3JobOperations.getJobsAndTasks().forEach(jobsAndTasks -> {
        Job job = jobsAndTasks.getLeft();
        List<Task> tasks = jobsAndTasks.getRight();
        List<Task> runningTasks = getRunningWorkers(tasks);
        ResourceDimension taskResources = perTaskResourceDimension(job);
        String appName = Evaluators.getOrDefault(job.getJobDescriptor().getApplicationName(), DEFAULT_APPLICATION);
        ResourceDimension currentConsumption = ResourceDimensions.multiply(taskResources, runningTasks.size());
        ResourceDimension maxConsumption = ResourceDimensions.multiply(taskResources, getMaxJobSize(job));
        Map<String, List<Task>> tasksByInstanceType = tasks.stream().collect(groupingBy(task -> task.getTaskContext().getOrDefault(TaskAttributes.TASK_ATTRIBUTES_AGENT_ITYPE, "unknown")));
        Map<String, ResourceConsumption> consumptionByInstanceType = CollectionsExt.mapValuesWithKeys(tasksByInstanceType, (instanceType, instanceTypeTasks) -> {
            List<Task> runningInstanceTypeTasks = getRunningWorkers(instanceTypeTasks);
            ResourceDimension instanceTypeConsumption = ResourceDimensions.multiply(taskResources, runningInstanceTypeTasks.size());
            return new ResourceConsumption(instanceType, ConsumptionLevel.InstanceType, instanceTypeConsumption, // maxConsumption is not relevant at ConsumptionLevel.InstanceType
            instanceTypeConsumption, getWorkerStateMap(instanceTypeTasks));
        }, HashMap::new);
        ResourceConsumption jobConsumption = new CompositeResourceConsumption(appName, ConsumptionLevel.Application, currentConsumption, maxConsumption, // allowedConsumption is not relevant at ConsumptionLevel.Application
        maxConsumption, getWorkerStateMap(tasks), consumptionByInstanceType, // we consider a job is always within its allowed usage since it can't go over its max
        false);
        String capacityGroup = resolveCapacityGroup(undefinedCapacityGroups, job, appName);
        updateConsumptionMap(appName, capacityGroup, jobConsumption, consumptionMap);
    });
    // Add unused capacity groups
    copyAndRemove(definedCapacityGroups, consumptionMap.keySet()).forEach(capacityGroup -> consumptionMap.put(capacityGroup, Collections.emptyMap()));
    return Pair.of(consumptionMap, undefinedCapacityGroups);
}
Also used : DEFAULT_APPLICATION(com.netflix.titus.master.service.management.ApplicationSlaManagementService.DEFAULT_APPLICATION) ApplicationSlaManagementService(com.netflix.titus.master.service.management.ApplicationSlaManagementService) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) HashMap(java.util.HashMap) ResourceDimensions(com.netflix.titus.master.model.ResourceDimensions) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CollectionsExt.copyAndRemove(com.netflix.titus.common.util.CollectionsExt.copyAndRemove) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ApplicationSLA(com.netflix.titus.api.model.ApplicationSLA) ConsumptionLevel(com.netflix.titus.master.service.management.ResourceConsumption.ConsumptionLevel) ResourceDimension(com.netflix.titus.api.model.ResourceDimension) SYSTEM_CONSUMER(com.netflix.titus.master.service.management.ResourceConsumption.SYSTEM_CONSUMER) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) Collection(java.util.Collection) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Tier(com.netflix.titus.api.model.Tier) Collectors(java.util.stream.Collectors) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) ContainerResources(com.netflix.titus.api.jobmanager.model.job.ContainerResources) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) Evaluators(com.netflix.titus.common.util.Evaluators) Collections(java.util.Collections) ResourceConsumptions(com.netflix.titus.master.service.management.ResourceConsumptions) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) ArrayList(java.util.ArrayList) List(java.util.List) Job(com.netflix.titus.api.jobmanager.model.job.Job) HashMap(java.util.HashMap) Map(java.util.Map) ResourceDimension(com.netflix.titus.api.model.ResourceDimension) HashSet(java.util.HashSet)

Example 4 with ResourceConsumption

use of com.netflix.titus.master.service.management.ResourceConsumption in project titus-control-plane by Netflix.

the class ResourceConsumptionEvaluator method updateConsumptionMap.

private void updateConsumptionMap(String applicationName, String capacityGroup, ResourceConsumption jobConsumption, Map<String, Map<String, ResourceConsumption>> consumptionMap) {
    Map<String, ResourceConsumption> capacityGroupAllocation = consumptionMap.computeIfAbsent(capacityGroup, k -> new HashMap<>());
    String effectiveAppName = applicationName == null ? DEFAULT_APPLICATION : applicationName;
    ResourceConsumption appAllocation = capacityGroupAllocation.get(effectiveAppName);
    if (appAllocation == null) {
        capacityGroupAllocation.put(effectiveAppName, jobConsumption);
    } else {
        capacityGroupAllocation.put(effectiveAppName, ResourceConsumptions.add(appAllocation, jobConsumption));
    }
}
Also used : ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption)

Example 5 with ResourceConsumption

use of com.netflix.titus.master.service.management.ResourceConsumption in project titus-control-plane by Netflix.

the class ResourceConsumptionEvaluator method buildSystemConsumption.

private CompositeResourceConsumption buildSystemConsumption(Map<String, Map<String, ResourceConsumption>> capacityGroupConsumptionMap) {
    // Capacity group level
    Map<Tier, List<CompositeResourceConsumption>> tierConsumptions = new HashMap<>();
    capacityGroupConsumptionMap.forEach((capacityGroup, appConsumptions) -> {
        ApplicationSLA sla = applicationSlaMap.get(capacityGroup);
        ResourceDimension allowedConsumption = ResourceDimensions.multiply(sla.getResourceDimension(), sla.getInstanceCount());
        ResourceDimension maxConsumption = ResourceConsumptions.addMaxConsumptions(appConsumptions.values());
        List<Map<String, Object>> attrsList = appConsumptions.values().stream().map(ResourceConsumption::getAttributes).collect(Collectors.toList());
        CompositeResourceConsumption capacityGroupConsumption = new CompositeResourceConsumption(capacityGroup, ConsumptionLevel.CapacityGroup, ResourceConsumptions.addCurrentConsumptions(appConsumptions.values()), maxConsumption, allowedConsumption, ResourceConsumptions.mergeAttributes(attrsList), appConsumptions, !ResourceDimensions.isBigger(allowedConsumption, maxConsumption));
        tierConsumptions.computeIfAbsent(sla.getTier(), t -> new ArrayList<>()).add(capacityGroupConsumption);
    });
    // Tier level
    List<CompositeResourceConsumption> aggregatedTierConsumptions = new ArrayList<>();
    tierConsumptions.forEach((tier, consumptions) -> aggregatedTierConsumptions.add(ResourceConsumptions.aggregate(tier.name(), ConsumptionLevel.Tier, consumptions)));
    // System level
    return ResourceConsumptions.aggregate(SYSTEM_CONSUMER, ConsumptionLevel.System, aggregatedTierConsumptions);
}
Also used : DEFAULT_APPLICATION(com.netflix.titus.master.service.management.ApplicationSlaManagementService.DEFAULT_APPLICATION) ApplicationSlaManagementService(com.netflix.titus.master.service.management.ApplicationSlaManagementService) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) HashMap(java.util.HashMap) ResourceDimensions(com.netflix.titus.master.model.ResourceDimensions) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CollectionsExt.copyAndRemove(com.netflix.titus.common.util.CollectionsExt.copyAndRemove) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ResourceConsumption(com.netflix.titus.master.service.management.ResourceConsumption) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ApplicationSLA(com.netflix.titus.api.model.ApplicationSLA) ConsumptionLevel(com.netflix.titus.master.service.management.ResourceConsumption.ConsumptionLevel) ResourceDimension(com.netflix.titus.api.model.ResourceDimension) SYSTEM_CONSUMER(com.netflix.titus.master.service.management.ResourceConsumption.SYSTEM_CONSUMER) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) Collection(java.util.Collection) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Tier(com.netflix.titus.api.model.Tier) Collectors(java.util.stream.Collectors) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) ContainerResources(com.netflix.titus.api.jobmanager.model.job.ContainerResources) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) Evaluators(com.netflix.titus.common.util.Evaluators) Collections(java.util.Collections) ResourceConsumptions(com.netflix.titus.master.service.management.ResourceConsumptions) CompositeResourceConsumption(com.netflix.titus.master.service.management.CompositeResourceConsumption) Tier(com.netflix.titus.api.model.Tier) HashMap(java.util.HashMap) ApplicationSLA(com.netflix.titus.api.model.ApplicationSLA) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ResourceDimension(com.netflix.titus.api.model.ResourceDimension)

Aggregations

CompositeResourceConsumption (com.netflix.titus.master.service.management.CompositeResourceConsumption)8 ResourceConsumption (com.netflix.titus.master.service.management.ResourceConsumption)8 ResourceDimension (com.netflix.titus.api.model.ResourceDimension)5 List (java.util.List)5 Job (com.netflix.titus.api.jobmanager.model.job.Job)4 Tier (com.netflix.titus.api.model.Tier)4 ResourceDimensions (com.netflix.titus.master.model.ResourceDimensions)4 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Set (java.util.Set)4 ContainerResources (com.netflix.titus.api.jobmanager.model.job.ContainerResources)3 Task (com.netflix.titus.api.jobmanager.model.job.Task)3 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)3 BatchJobExt (com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt)3 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)3 ApplicationSLA (com.netflix.titus.api.model.ApplicationSLA)3 CollectionsExt (com.netflix.titus.common.util.CollectionsExt)3 Pair (com.netflix.titus.common.util.tuple.Pair)3 ApplicationSlaManagementService (com.netflix.titus.master.service.management.ApplicationSlaManagementService)3 ConsumptionLevel (com.netflix.titus.master.service.management.ResourceConsumption.ConsumptionLevel)3