Search in sources :

Example 31 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class TaskMigrationDescheduler method computeFitness.

private Pair<Double, List<Task>> computeFitness(TitusNode agent) {
    List<Task> tasks = evacuatedAgentsAllocationTracker.getTasksOnAgent(agent.getId());
    if (tasks.isEmpty()) {
        return FITNESS_RESULT_NONE;
    }
    boolean systemWindowOpen = evictionQuotaTracker.isSystemDisruptionWindowOpen();
    long availableTerminationLimit;
    if (systemWindowOpen) {
        availableTerminationLimit = Math.min(tasks.size(), evictionQuotaTracker.getSystemEvictionQuota());
        if (availableTerminationLimit <= 0) {
            return FITNESS_RESULT_NONE;
        }
    } else {
        // system window is closed, we'll need to inspect all eligible jobs during closed window
        availableTerminationLimit = tasks.size();
    }
    Map<String, List<Task>> chosen = new HashMap<>();
    List<Task> chosenList = new ArrayList<>();
    for (Task task : tasks) {
        if (canTerminate(task)) {
            String jobId = task.getJobId();
            Job<?> job = jobsById.get(jobId);
            // if window is closed, then only pick up jobs that are exempt
            boolean continueWithJobQuotaCheck = systemWindowOpen || isJobExemptFromSystemDisruptionWindow(job);
            if (continueWithJobQuotaCheck) {
                // applying job eviction quota
                long quota = evictionQuotaTracker.getJobEvictionQuota(jobId);
                long used = chosen.getOrDefault(jobId, Collections.emptyList()).size();
                if ((quota - used) > 0) {
                    chosen.computeIfAbsent(jobId, jid -> new ArrayList<>()).add(task);
                    chosenList.add(task);
                    if (availableTerminationLimit <= chosenList.size()) {
                        break;
                    }
                }
            }
        }
    }
    if (chosenList.size() == 0) {
        return FITNESS_RESULT_NONE;
    }
    int leftOnAgent = tasks.size() - chosenList.size();
    double fitness = Math.max(FITNESS_PERFECT - leftOnAgent * TASK_ON_AGENT_PENALTY, 0.01);
    return Pair.of(fitness, chosenList);
}
Also used : DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) Task(com.netflix.titus.api.jobmanager.model.job.Task) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelocationPredicates(com.netflix.titus.supplementary.relocation.util.RelocationPredicates) Function(java.util.function.Function) ArrayList(java.util.ArrayList) List(java.util.List) Matcher(java.util.regex.Matcher) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) Optional(java.util.Optional) EvictionConfiguration(com.netflix.titus.runtime.connector.eviction.EvictionConfiguration) Pattern(java.util.regex.Pattern) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) RegExpExt(com.netflix.titus.common.util.RegExpExt) Comparator(java.util.Comparator) Collections(java.util.Collections) Clock(com.netflix.titus.common.util.time.Clock) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 32 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class GrpcJobReplicatorEventStreamTest method testCacheTaskMove.

@Test
public void testCacheTaskMove() {
    Pair<Job, List<Task>> pair = jobServiceStub.createJobAndTasks(SERVICE_JOB);
    Job target = jobServiceStub.createJob(SERVICE_JOB);
    Task task = pair.getRight().get(0);
    String sourceJobId = pair.getLeft().getId();
    String targetJobId = target.getId();
    List<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>> events = new ArrayList<>();
    newConnectVerifier().assertNext(next -> assertThat(next.getSnapshot().getTaskMap().values()).allSatisfy(t -> assertThat(t.getStatus().getState()).isEqualTo(TaskState.Accepted))).then(() -> jobServiceStub.moveTaskToState(task, TaskState.Started)).assertNext(next -> {
        JobSnapshot snapshot = next.getSnapshot();
        Optional<Pair<Job<?>, Task>> taskOpt = snapshot.findTaskById(task.getId());
        assertThat(taskOpt).isPresent();
        assertThat(taskOpt.get().getRight().getStatus().getState()).isEqualTo(TaskState.Started);
        assertThat(snapshot.getTasks(sourceJobId)).containsKey(task.getId());
    }).then(() -> jobServiceStub.getJobOperations().moveServiceTask(sourceJobId, targetJobId, task.getId(), CallMetadata.newBuilder().withCallerId("Test").withCallReason("testing").build()).test().awaitTerminalEvent().assertNoErrors()).recordWith(() -> events).thenConsumeWhile(next -> {
        JobManagerEvent<?> trigger = next.getTrigger();
        if (!(trigger instanceof TaskUpdateEvent)) {
            return true;
        }
        TaskUpdateEvent taskUpdateEvent = (TaskUpdateEvent) trigger;
        return !taskUpdateEvent.isMovedFromAnotherJob();
    }).thenCancel().verify();
    assertThat(events).hasSize(3);
    events.stream().map(ReplicatorEvent::getTrigger).forEach(jobManagerEvent -> {
        if (jobManagerEvent instanceof JobUpdateEvent) {
            JobUpdateEvent jobUpdateEvent = (JobUpdateEvent) jobManagerEvent;
            String eventJobId = jobUpdateEvent.getCurrent().getId();
            assertThat(eventJobId).isIn(sourceJobId, targetJobId);
        } else if (jobManagerEvent instanceof TaskUpdateEvent) {
            TaskUpdateEvent taskUpdateEvent = (TaskUpdateEvent) jobManagerEvent;
            assertThat(taskUpdateEvent.isMovedFromAnotherJob()).isTrue();
            assertThat(taskUpdateEvent.getCurrentJob().getId()).isEqualTo(targetJobId);
            assertThat(taskUpdateEvent.getCurrent().getJobId()).isEqualTo(targetJobId);
            assertThat(taskUpdateEvent.getCurrent().getTaskContext().get(TaskAttributes.TASK_ATTRIBUTES_MOVED_FROM_JOB)).isEqualTo(sourceJobId);
        } else {
            fail("Unexpected event type: %s", jobManagerEvent);
        }
    });
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) JobDescriptorGenerator(com.netflix.titus.testkit.model.job.JobDescriptorGenerator) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) StepVerifier(reactor.test.StepVerifier) Task(com.netflix.titus.api.jobmanager.model.job.Task) CallMetadataConstants(com.netflix.titus.api.model.callmetadata.CallMetadataConstants) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) TimeoutException(java.util.concurrent.TimeoutException) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) ArrayList(java.util.ArrayList) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) Pair(com.netflix.titus.common.util.tuple.Pair) CacheUpdater(com.netflix.titus.runtime.connector.jobmanager.replicator.GrpcJobReplicatorEventStream.CacheUpdater) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) Duration(java.time.Duration) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) Schedulers(reactor.core.scheduler.Schedulers) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) Before(org.junit.Before) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) Job(com.netflix.titus.api.jobmanager.model.job.Job) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactories(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactories) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) Mockito(org.mockito.Mockito) List(java.util.List) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) Assertions.fail(org.assertj.core.api.Assertions.fail) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobComponentStub(com.netflix.titus.testkit.model.job.JobComponentStub) DataReplicatorMetrics(com.netflix.titus.runtime.connector.common.replicator.DataReplicatorMetrics) Optional(java.util.Optional) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Mockito.mock(org.mockito.Mockito.mock) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) Task(com.netflix.titus.api.jobmanager.model.job.Task) Optional(java.util.Optional) ArrayList(java.util.ArrayList) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) ArrayList(java.util.ArrayList) List(java.util.List) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) Test(org.junit.Test)

Example 33 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class ReservationUsageCalculator method buildUsage.

public Map<String, ReservationUsage> buildUsage() {
    Map<String, ResourceAccumulator> accumulatorMap = new HashMap<>();
    List<Pair<Job, List<Task>>> jobsAndTasks = jobOperations.getJobsAndTasks();
    Set<String> capacityGroupNames = capacityManagementService.getApplicationSLAs().stream().map(ApplicationSLA::getAppName).collect(Collectors.toSet());
    for (Pair<Job, List<Task>> jobAndTasks : jobsAndTasks) {
        Job job = jobAndTasks.getLeft();
        String capacityGroup = capacityGroupNames.contains(job.getJobDescriptor().getCapacityGroup()) ? job.getJobDescriptor().getCapacityGroup() : ApplicationSlaManagementService.DEFAULT_APPLICATION;
        ResourceAccumulator accumulator = accumulatorMap.computeIfAbsent(capacityGroup, cp -> new ResourceAccumulator());
        processJob(accumulator, jobAndTasks);
    }
    Map<String, ReservationUsage> result = new HashMap<>();
    accumulatorMap.forEach((capacityGroup, accumulator) -> result.put(capacityGroup, accumulator.toReservationUsage()));
    capacityManagementService.getApplicationSLAs().forEach(capacityGroup -> {
        if (!result.containsKey(capacityGroup.getAppName())) {
            result.put(capacityGroup.getAppName(), ReservationUsage.none());
        }
    });
    return result;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) ReservationUsage(com.netflix.titus.api.endpoint.v2.rest.representation.ReservationUsage) List(java.util.List) Job(com.netflix.titus.api.jobmanager.model.job.Job) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 34 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class TaskTerminationExecutor method checkTaskIsRunningOrThrowAnException.

private Pair<Job<?>, Task> checkTaskIsRunningOrThrowAnException(String taskId) {
    Optional<Pair<Job<?>, Task>> jobAndTask = jobOperations.findTaskById(taskId);
    if (!jobAndTask.isPresent()) {
        throw EvictionException.taskNotFound(taskId);
    }
    Task task = jobAndTask.get().getRight();
    TaskState state = task.getStatus().getState();
    if (state == TaskState.Accepted) {
        throw EvictionException.taskNotScheduledYet(task);
    }
    if (!TaskState.isBefore(state, TaskState.KillInitiated)) {
        throw EvictionException.taskAlreadyStopped(task);
    }
    return jobAndTask.get();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 35 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class TaskMigrationDeschedulerTest method testSelfMigrationAfterDeadline.

@Test
public void testSelfMigrationAfterDeadline() {
    Task job1Task0 = jobOperations.getTasks("job1").get(0);
    relocationConnectorStubs.place("removable1", job1Task0);
    relocationConnectorStubs.setQuota("job1", 1);
    TaskRelocationPlan job1Task0Plan = TaskRelocationPlan.newBuilder().withTaskId(job1Task0.getId()).withRelocationTime(clock.wallTime() - 1).build();
    Optional<Pair<TitusNode, List<Task>>> results = newDescheduler(Collections.singletonMap(job1Task0.getId(), job1Task0Plan)).nextBestMatch();
    assertThat(results).isNotEmpty();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) Pair(com.netflix.titus.common.util.tuple.Pair) Test(org.junit.Test)

Aggregations

Pair (com.netflix.titus.common.util.tuple.Pair)41 Task (com.netflix.titus.api.jobmanager.model.job.Task)22 List (java.util.List)21 ArrayList (java.util.ArrayList)18 Job (com.netflix.titus.api.jobmanager.model.job.Job)14 Map (java.util.Map)10 Collectors (java.util.stream.Collectors)10 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)8 Optional (java.util.Optional)8 Logger (org.slf4j.Logger)8 LoggerFactory (org.slf4j.LoggerFactory)8 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)7 HashMap (java.util.HashMap)7 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)6 Collections (java.util.Collections)6 Observable (rx.Observable)6 TaskRelocationPlan (com.netflix.titus.api.relocation.model.TaskRelocationPlan)5 Function (java.util.function.Function)5 PreparedStatement (com.datastax.driver.core.PreparedStatement)4 Session (com.datastax.driver.core.Session)4