Search in sources :

Example 1 with DeschedulingResult

use of com.netflix.titus.supplementary.relocation.model.DeschedulingResult in project titus-control-plane by Netflix.

the class TaskMigrationDeschedulerTest method testImmediateMigrations.

@Test
public void testImmediateMigrations() {
    relocationConnectorStubs.addJobAttribute("jobToMigrate", RelocationAttributes.RELOCATION_REQUIRED_BY_IMMEDIATELY, "" + (clock.wallTime() + 1));
    Task task0 = jobOperations.getTasks("jobToMigrate").get(0);
    relocationConnectorStubs.place("active1", task0);
    Map<String, DeschedulingResult> immediateEvictions = newDescheduler(Collections.emptyMap()).findAllImmediateEvictions();
    assertThat(immediateEvictions).hasSize(1).containsKey(task0.getId());
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Test(org.junit.Test)

Example 2 with DeschedulingResult

use of com.netflix.titus.supplementary.relocation.model.DeschedulingResult in project titus-control-plane by Netflix.

the class TaskMigrationDeschedulerTest method testJobRequiredMigrationBy.

@Test
public void testJobRequiredMigrationBy() {
    Task job1Task0 = jobOperations.getTasks("job1").get(0);
    relocationConnectorStubs.place("active1", job1Task0);
    relocationConnectorStubs.setQuota("job1", 1);
    relocationConnectorStubs.addJobAttribute("job1", RelocationAttributes.RELOCATION_REQUIRED_BY, "" + clock.wallTime());
    clock.advanceTime(Duration.ofSeconds(1));
    Map<String, DeschedulingResult> results = newDescheduler(Collections.emptyMap()).findRequestedJobOrTaskMigrations();
    assertThat(results).isNotEmpty();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Test(org.junit.Test)

Example 3 with DeschedulingResult

use of com.netflix.titus.supplementary.relocation.model.DeschedulingResult in project titus-control-plane by Netflix.

the class DefaultDeschedulerService method deschedule.

@Override
public List<DeschedulingResult> deschedule(Map<String, TaskRelocationPlan> plannedAheadTaskRelocationPlans) {
    List<Pair<Job, List<Task>>> allJobsAndTasks = jobOperations.getJobsAndTasks();
    Map<String, Job<?>> jobs = allJobsAndTasks.stream().map(Pair::getLeft).collect(Collectors.toMap(Job::getId, j -> j));
    Map<String, Task> tasksById = allJobsAndTasks.stream().flatMap(p -> p.getRight().stream()).collect(Collectors.toMap(Task::getId, t -> t));
    EvacuatedAgentsAllocationTracker evacuatedAgentsAllocationTracker = new EvacuatedAgentsAllocationTracker(nodeDataResolver.resolve(), tasksById);
    EvictionQuotaTracker evictionQuotaTracker = new EvictionQuotaTracker(evictionOperations, jobs);
    TaskMigrationDescheduler taskMigrationDescheduler = new TaskMigrationDescheduler(plannedAheadTaskRelocationPlans, evacuatedAgentsAllocationTracker, evictionQuotaTracker, evictionConfiguration, jobs, tasksById, titusRuntime);
    Map<String, DeschedulingResult> requestedImmediateEvictions = taskMigrationDescheduler.findAllImmediateEvictions();
    Map<String, DeschedulingResult> requestedEvictions = taskMigrationDescheduler.findRequestedJobOrTaskMigrations();
    Map<String, DeschedulingResult> allRequestedEvictions = CollectionsExt.merge(requestedImmediateEvictions, requestedEvictions);
    Map<String, DeschedulingResult> regularEvictions = new HashMap<>();
    Optional<Pair<TitusNode, List<Task>>> bestMatch;
    while ((bestMatch = taskMigrationDescheduler.nextBestMatch()).isPresent()) {
        TitusNode agent = bestMatch.get().getLeft();
        List<Task> tasks = bestMatch.get().getRight();
        tasks.forEach(task -> {
            if (!allRequestedEvictions.containsKey(task.getId())) {
                Optional<TaskRelocationPlan> relocationPlanForTask = getRelocationPlanForTask(agent, task, plannedAheadTaskRelocationPlans);
                relocationPlanForTask.ifPresent(rp -> regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(rp).build()));
            }
        });
    }
    // Find eviction which could not be scheduled now.
    for (Task task : tasksById.values()) {
        if (allRequestedEvictions.containsKey(task.getId()) || regularEvictions.containsKey(task.getId())) {
            continue;
        }
        if (evacuatedAgentsAllocationTracker.isEvacuated(task)) {
            DeschedulingFailure failure = taskMigrationDescheduler.getDeschedulingFailure(task);
            TaskRelocationPlan relocationPlan = plannedAheadTaskRelocationPlans.get(task.getId());
            if (relocationPlan == null) {
                relocationPlan = newNotDelayedRelocationPlan(task, false);
            }
            TitusNode agent = evacuatedAgentsAllocationTracker.getRemovableAgent(task);
            regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(relocationPlan).withFailure(failure).build());
        }
    }
    return CollectionsExt.merge(new ArrayList<>(allRequestedEvictions.values()), new ArrayList<>(regularEvictions.values()));
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) HashMap(java.util.HashMap) RelocationPredicates(com.netflix.titus.supplementary.relocation.util.RelocationPredicates) Singleton(javax.inject.Singleton) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Pair(com.netflix.titus.common.util.tuple.Pair) Map(java.util.Map) EvictionConfiguration(com.netflix.titus.runtime.connector.eviction.EvictionConfiguration) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) TaskRelocationReason(com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) Job(com.netflix.titus.api.jobmanager.model.job.Job) Collectors(java.util.stream.Collectors) List(java.util.List) ReadOnlyEvictionOperations(com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations) Optional(java.util.Optional) RelocationUtil(com.netflix.titus.supplementary.relocation.util.RelocationUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Clock(com.netflix.titus.common.util.time.Clock) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) JobFunctions.hasDisruptionBudget(com.netflix.titus.api.jobmanager.model.job.JobFunctions.hasDisruptionBudget) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) DeschedulingFailure(com.netflix.titus.supplementary.relocation.model.DeschedulingFailure) Job(com.netflix.titus.api.jobmanager.model.job.Job) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 4 with DeschedulingResult

use of com.netflix.titus.supplementary.relocation.model.DeschedulingResult in project titus-control-plane by Netflix.

the class TaskMigrationDescheduler method findRequestedJobOrTaskMigrations.

Map<String, DeschedulingResult> findRequestedJobOrTaskMigrations() {
    long now = clock.wallTime();
    Map<String, DeschedulingResult> result = new HashMap<>();
    tasksById.values().forEach(task -> {
        Job<?> job = jobsById.get(task.getJobId());
        TitusNode instance = evacuatedAgentsAllocationTracker.getAgent(task);
        if (job != null && instance != null) {
            RelocationPredicates.checkIfRelocationRequired(job, task).ifPresent(reason -> {
                if (isSystemEvictionQuotaAvailable(job) && canTerminate(task)) {
                    long quota = evictionQuotaTracker.getJobEvictionQuota(task.getJobId());
                    if (quota > 0) {
                        evictionQuotaTracker.consumeQuota(task.getJobId(), isJobExemptFromSystemDisruptionWindow(job));
                        result.put(task.getId(), newDeschedulingResultForRequestedRelocation(now, task, instance, reason.getRight()));
                    }
                }
            });
        }
    });
    return result;
}
Also used : HashMap(java.util.HashMap) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode)

Example 5 with DeschedulingResult

use of com.netflix.titus.supplementary.relocation.model.DeschedulingResult in project titus-control-plane by Netflix.

the class DeschedulerStep method deschedule.

/**
 * Accepts collection of tasks that must be relocated, and their relocation was planned ahead of time.
 * For certain scenarios ahead of planning is not possible or desirable. For example during agent defragmentation,
 * the defragmentation process must be down quickly, otherwise it may become quickly obsolete.
 *
 * @return a collection of tasks to terminate now. This collection may include tasks from the 'mustBeRelocatedTasks'
 * collection if their deadline has passed. It may also include tasks that were not planned ahead of time
 * for relocation.
 */
public Map<String, DeschedulingResult> deschedule(Map<String, TaskRelocationPlan> tasksToEvict) {
    Stopwatch stopwatch = Stopwatch.createStarted();
    try {
        Map<String, DeschedulingResult> result = execute(tasksToEvict);
        metrics.onSuccess(result.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS));
        logger.debug("Descheduling result: {}", result);
        return result;
    } catch (Exception e) {
        logger.error("Step processing error", e);
        metrics.onError(stopwatch.elapsed(TimeUnit.MILLISECONDS));
        throw e;
    }
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult)

Aggregations

DeschedulingResult (com.netflix.titus.supplementary.relocation.model.DeschedulingResult)9 Task (com.netflix.titus.api.jobmanager.model.job.Task)6 Test (org.junit.Test)5 TitusNode (com.netflix.titus.supplementary.relocation.connector.TitusNode)3 HashMap (java.util.HashMap)3 TaskRelocationPlan (com.netflix.titus.api.relocation.model.TaskRelocationPlan)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Stopwatch (com.google.common.base.Stopwatch)1 ReadOnlyEvictionOperations (com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations)1 Job (com.netflix.titus.api.jobmanager.model.job.Job)1 JobFunctions.hasDisruptionBudget (com.netflix.titus.api.jobmanager.model.job.JobFunctions.hasDisruptionBudget)1 ServiceJobTask (com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)1 ReadOnlyJobOperations (com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations)1 TaskRelocationReason (com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason)1 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)1 CollectionsExt (com.netflix.titus.common.util.CollectionsExt)1 Clock (com.netflix.titus.common.util.time.Clock)1 Pair (com.netflix.titus.common.util.tuple.Pair)1 EvictionConfiguration (com.netflix.titus.runtime.connector.eviction.EvictionConfiguration)1 NodeDataResolver (com.netflix.titus.supplementary.relocation.connector.NodeDataResolver)1