Search in sources :

Example 6 with TaskRelocationPlan

use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.

the class TaskMigrationDeschedulerTest method testSelfMigration.

@Test
public void testSelfMigration() {
    Task job1Task0 = jobOperations.getTasks("job1").get(0);
    relocationConnectorStubs.place("removable1", job1Task0);
    relocationConnectorStubs.setQuota("job1", 1);
    TaskRelocationPlan job1Task0Plan = TaskRelocationPlan.newBuilder().withTaskId(job1Task0.getId()).withRelocationTime(Long.MAX_VALUE / 2).build();
    Optional<Pair<TitusNode, List<Task>>> results = newDescheduler(Collections.singletonMap(job1Task0.getId(), job1Task0Plan)).nextBestMatch();
    assertThat(results).isEmpty();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) Pair(com.netflix.titus.common.util.tuple.Pair) Test(org.junit.Test)

Example 7 with TaskRelocationPlan

use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.

the class DefaultDeschedulerService method deschedule.

@Override
public List<DeschedulingResult> deschedule(Map<String, TaskRelocationPlan> plannedAheadTaskRelocationPlans) {
    List<Pair<Job, List<Task>>> allJobsAndTasks = jobOperations.getJobsAndTasks();
    Map<String, Job<?>> jobs = allJobsAndTasks.stream().map(Pair::getLeft).collect(Collectors.toMap(Job::getId, j -> j));
    Map<String, Task> tasksById = allJobsAndTasks.stream().flatMap(p -> p.getRight().stream()).collect(Collectors.toMap(Task::getId, t -> t));
    EvacuatedAgentsAllocationTracker evacuatedAgentsAllocationTracker = new EvacuatedAgentsAllocationTracker(nodeDataResolver.resolve(), tasksById);
    EvictionQuotaTracker evictionQuotaTracker = new EvictionQuotaTracker(evictionOperations, jobs);
    TaskMigrationDescheduler taskMigrationDescheduler = new TaskMigrationDescheduler(plannedAheadTaskRelocationPlans, evacuatedAgentsAllocationTracker, evictionQuotaTracker, evictionConfiguration, jobs, tasksById, titusRuntime);
    Map<String, DeschedulingResult> requestedImmediateEvictions = taskMigrationDescheduler.findAllImmediateEvictions();
    Map<String, DeschedulingResult> requestedEvictions = taskMigrationDescheduler.findRequestedJobOrTaskMigrations();
    Map<String, DeschedulingResult> allRequestedEvictions = CollectionsExt.merge(requestedImmediateEvictions, requestedEvictions);
    Map<String, DeschedulingResult> regularEvictions = new HashMap<>();
    Optional<Pair<TitusNode, List<Task>>> bestMatch;
    while ((bestMatch = taskMigrationDescheduler.nextBestMatch()).isPresent()) {
        TitusNode agent = bestMatch.get().getLeft();
        List<Task> tasks = bestMatch.get().getRight();
        tasks.forEach(task -> {
            if (!allRequestedEvictions.containsKey(task.getId())) {
                Optional<TaskRelocationPlan> relocationPlanForTask = getRelocationPlanForTask(agent, task, plannedAheadTaskRelocationPlans);
                relocationPlanForTask.ifPresent(rp -> regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(rp).build()));
            }
        });
    }
    // Find eviction which could not be scheduled now.
    for (Task task : tasksById.values()) {
        if (allRequestedEvictions.containsKey(task.getId()) || regularEvictions.containsKey(task.getId())) {
            continue;
        }
        if (evacuatedAgentsAllocationTracker.isEvacuated(task)) {
            DeschedulingFailure failure = taskMigrationDescheduler.getDeschedulingFailure(task);
            TaskRelocationPlan relocationPlan = plannedAheadTaskRelocationPlans.get(task.getId());
            if (relocationPlan == null) {
                relocationPlan = newNotDelayedRelocationPlan(task, false);
            }
            TitusNode agent = evacuatedAgentsAllocationTracker.getRemovableAgent(task);
            regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(relocationPlan).withFailure(failure).build());
        }
    }
    return CollectionsExt.merge(new ArrayList<>(allRequestedEvictions.values()), new ArrayList<>(regularEvictions.values()));
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) HashMap(java.util.HashMap) RelocationPredicates(com.netflix.titus.supplementary.relocation.util.RelocationPredicates) Singleton(javax.inject.Singleton) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) EvictionConfiguration(com.netflix.titus.runtime.connector.eviction.EvictionConfiguration) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) TaskRelocationReason(com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Job(com.netflix.titus.api.jobmanager.model.job.Job) Collectors(java.util.stream.Collectors) List(java.util.List) ReadOnlyEvictionOperations(com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations) Optional(java.util.Optional) RelocationUtil(com.netflix.titus.supplementary.relocation.util.RelocationUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Clock(com.netflix.titus.common.util.time.Clock) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) JobFunctions.hasDisruptionBudget(com.netflix.titus.api.jobmanager.model.job.JobFunctions.hasDisruptionBudget) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) Job(com.netflix.titus.api.jobmanager.model.job.Job) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 8 with TaskRelocationPlan

use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.

the class TaskMigrationDescheduler method canTerminate.

private boolean canTerminate(Task task) {
    Job<?> job = jobsById.get(task.getJobId());
    if (job == null) {
        return false;
    }
    TaskRelocationPlan relocationPlan = plannedAheadTaskRelocationPlans.get(task.getId());
    // If no relocation plan is found, this means the disruption budget policy does not limit us here.
    if (relocationPlan == null) {
        return true;
    }
    return relocationPlan.getRelocationTime() <= clock.wallTime();
}
Also used : TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan)

Example 9 with TaskRelocationPlan

use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.

the class DefaultDeschedulerService method getRelocationPlanForTask.

@VisibleForTesting
Optional<TaskRelocationPlan> getRelocationPlanForTask(TitusNode agent, Task task, Map<String, TaskRelocationPlan> plannedAheadTaskRelocationPlans) {
    AtomicReference<Optional<TaskRelocationPlan>> result = new AtomicReference<>(Optional.empty());
    TaskRelocationPlan plannedAheadTaskRelocationPlan = plannedAheadTaskRelocationPlans.get(task.getId());
    if (plannedAheadTaskRelocationPlan == null) {
        // recheck if a self managed plan is needed
        jobOperations.getJob(task.getJobId()).ifPresent(job -> RelocationPredicates.checkIfNeedsRelocationPlan(job, task, agent).ifPresent(reason -> {
            if (RelocationPredicates.isSelfManaged(job) && hasDisruptionBudget(job)) {
                result.set(Optional.of(RelocationUtil.buildSelfManagedRelocationPlan(job, task, reason, clock.wallTime())));
            }
        }));
        if (!result.get().isPresent()) {
            result.set(Optional.of(newNotDelayedRelocationPlan(task, true)));
        }
    } else {
        result.set(Optional.of(plannedAheadTaskRelocationPlan));
    }
    return result.get();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) HashMap(java.util.HashMap) RelocationPredicates(com.netflix.titus.supplementary.relocation.util.RelocationPredicates) Singleton(javax.inject.Singleton) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) EvictionConfiguration(com.netflix.titus.runtime.connector.eviction.EvictionConfiguration) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) TaskRelocationReason(com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Job(com.netflix.titus.api.jobmanager.model.job.Job) Collectors(java.util.stream.Collectors) List(java.util.List) ReadOnlyEvictionOperations(com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations) Optional(java.util.Optional) RelocationUtil(com.netflix.titus.supplementary.relocation.util.RelocationUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Clock(com.netflix.titus.common.util.time.Clock) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) JobFunctions.hasDisruptionBudget(com.netflix.titus.api.jobmanager.model.job.JobFunctions.hasDisruptionBudget) Optional(java.util.Optional) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with TaskRelocationPlan

use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.

the class NeedsMigrationQueryHandler method findMatchingTasks.

private List<com.netflix.titus.grpc.protogen.Task> findMatchingTasks(JobQueryCriteria<TaskStatus.TaskState, JobDescriptor.JobSpecCase> queryCriteria) {
    JobSnapshot jobSnapshot = jobDataReplicator.getCurrent();
    Map<String, Job<?>> jobMap = jobSnapshot.getJobMap();
    Map<String, TaskRelocationPlan> relocationPlans = relocationDataReplicator.getCurrent().getPlans();
    V3TaskQueryCriteriaEvaluator queryFilter = new V3TaskQueryCriteriaEvaluator(queryCriteria, titusRuntime);
    V3TaskQueryCriteriaEvaluator queryFilterWithoutNeedsMigration = new V3TaskQueryCriteriaEvaluator(filterOutNeedsMigration(queryCriteria), titusRuntime);
    List<com.netflix.titus.grpc.protogen.Task> matchingTasks = new ArrayList<>();
    jobMap.forEach((jobId, job) -> {
        Map<String, Task> tasks = jobSnapshot.getTasks(jobId);
        if (!CollectionsExt.isNullOrEmpty(tasks)) {
            tasks.forEach((taskId, task) -> {
                TaskRelocationPlan plan = relocationPlans.get(task.getId());
                Pair<Job<?>, Task> jobTaskPair = Pair.of(job, task);
                if (plan != null) {
                    if (queryFilterWithoutNeedsMigration.test(jobTaskPair)) {
                        matchingTasks.add(newTaskWithRelocationPlan(GrpcJobManagementModelConverters.toGrpcTask(task, logStorageInfo), plan));
                    }
                } else {
                    if (queryFilter.test(jobTaskPair)) {
                        matchingTasks.add(GrpcJobManagementModelConverters.toGrpcTask(task, logStorageInfo));
                    }
                }
            });
        }
    });
    return matchingTasks;
}
Also used : V3TaskQueryCriteriaEvaluator(com.netflix.titus.runtime.endpoint.v3.grpc.query.V3TaskQueryCriteriaEvaluator) Task(com.netflix.titus.api.jobmanager.model.job.Task) ArrayList(java.util.ArrayList) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) Job(com.netflix.titus.api.jobmanager.model.job.Job)

Aggregations

TaskRelocationPlan (com.netflix.titus.api.relocation.model.TaskRelocationPlan)18 Task (com.netflix.titus.api.jobmanager.model.job.Task)10 Test (org.junit.Test)10 Optional (java.util.Optional)5 Job (com.netflix.titus.api.jobmanager.model.job.Job)4 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)4 Pair (com.netflix.titus.common.util.tuple.Pair)4 AbstractTaskRelocationTest (com.netflix.titus.supplementary.relocation.AbstractTaskRelocationTest)4 TitusNode (com.netflix.titus.supplementary.relocation.connector.TitusNode)4 DeschedulingResult (com.netflix.titus.supplementary.relocation.model.DeschedulingResult)4 HashMap (java.util.HashMap)4 ReadOnlyEvictionOperations (com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations)3 BatchJobExt (com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt)3 ReadOnlyJobOperations (com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations)3 TaskRelocationReason (com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason)3 TaskRelocationStatus (com.netflix.titus.api.relocation.model.TaskRelocationStatus)3 Clock (com.netflix.titus.common.util.time.Clock)3 List (java.util.List)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3