Search in sources :

Example 11 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class JobReconciliationFrameworkFactory method loadJobsAndTasksFromStore.

private List<Pair<Job, List<Task>>> loadJobsAndTasksFromStore(InitializationErrorCollector errorCollector) {
    long startTime = clock.wallTime();
    // load all job/task pairs
    List<Pair<Job, Pair<List<Task>, Integer>>> jobTasksPairs;
    try {
        jobTasksPairs = store.init().andThen(store.retrieveJobs().flatMap(retrievedJobsAndErrors -> {
            errorCollector.corruptedJobRecords(retrievedJobsAndErrors.getRight());
            List<Job<?>> retrievedJobs = retrievedJobsAndErrors.getLeft();
            List<Observable<Pair<Job, Pair<List<Task>, Integer>>>> retrieveTasksObservables = new ArrayList<>();
            for (Job job : retrievedJobs) {
                // TODO Finished jobs that were not archived immediately should be archived by background archive process
                if (job.getStatus().getState() == JobState.Finished) {
                    logger.info("Not loading finished job: {}", job.getId());
                    continue;
                }
                Optional<Job> validatedJob = validateJob(job);
                if (validatedJob.isPresent()) {
                    Observable<Pair<Job, Pair<List<Task>, Integer>>> retrieveTasksObservable = store.retrieveTasksForJob(job.getId()).map(taskList -> new Pair<>(validatedJob.get(), taskList));
                    retrieveTasksObservables.add(retrieveTasksObservable);
                } else {
                    errorCollector.invalidJob(job.getId());
                }
            }
            return Observable.merge(retrieveTasksObservables, MAX_RETRIEVE_TASK_CONCURRENCY);
        })).toList().toBlocking().singleOrDefault(Collections.emptyList());
        int corruptedTaskRecords = jobTasksPairs.stream().mapToInt(p -> p.getRight().getRight()).sum();
        errorCollector.corruptedTaskRecords(corruptedTaskRecords);
        int taskCount = jobTasksPairs.stream().map(p -> p.getRight().getLeft().size()).reduce(0, (a, v) -> a + v);
        loadedJobs.set(jobTasksPairs.size());
        loadedTasks.set(taskCount);
        for (Pair<Job, Pair<List<Task>, Integer>> jobTaskPair : jobTasksPairs) {
            Job job = jobTaskPair.getLeft();
            List<Task> tasks = jobTaskPair.getRight().getLeft();
            List<String> taskStrings = tasks.stream().map(t -> String.format("<%s,ks:%s>", t.getId(), t.getStatus().getState())).collect(Collectors.toList());
            logger.info("Loaded job: {} with tasks: {}", job.getId(), taskStrings);
        }
        logger.info("{} jobs and {} tasks loaded from store in {}ms", jobTasksPairs.size(), taskCount, clock.wallTime() - startTime);
    } catch (Exception e) {
        logger.error("Failed to load jobs from the store during initialization:", e);
        throw new IllegalStateException("Failed to load jobs from the store during initialization", e);
    } finally {
        storeLoadTimeMs.set(clock.wallTime() - startTime);
    }
    return jobTasksPairs.stream().map(p -> Pair.of(p.getLeft(), p.getRight().getLeft())).collect(Collectors.toList());
}
Also used : IndexKind(com.netflix.titus.master.jobmanager.service.DefaultV3JobOperations.IndexKind) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) InternalReconciliationEngine(com.netflix.titus.common.framework.reconciler.internal.InternalReconciliationEngine) LoggerFactory(org.slf4j.LoggerFactory) DefaultReconciliationFramework(com.netflix.titus.common.framework.reconciler.internal.DefaultReconciliationFramework) ValidationError(com.netflix.titus.common.model.sanitizer.ValidationError) FeatureActivationConfiguration(com.netflix.titus.api.FeatureActivationConfiguration) JobEventFactory(com.netflix.titus.master.jobmanager.service.event.JobEventFactory) Map(java.util.Map) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) BasicTag(com.netflix.spectator.api.BasicTag) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) DifferenceResolver(com.netflix.titus.common.framework.reconciler.ReconciliationEngine.DifferenceResolver) Job(com.netflix.titus.api.jobmanager.model.job.Job) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Scheduler(rx.Scheduler) Collectors(java.util.stream.Collectors) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) List(java.util.List) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) Clock(com.netflix.titus.common.util.time.Clock) Gauge(com.netflix.spectator.api.Gauge) EntitySanitizer(com.netflix.titus.common.model.sanitizer.EntitySanitizer) ApplicationSlaManagementService(com.netflix.titus.master.service.management.ApplicationSlaManagementService) DefaultReconciliationEngine(com.netflix.titus.common.framework.reconciler.internal.DefaultReconciliationEngine) MetricConstants(com.netflix.titus.master.MetricConstants) Singleton(javax.inject.Singleton) TaskTimeoutChangeActions(com.netflix.titus.master.jobmanager.service.common.action.task.TaskTimeoutChangeActions) ArrayList(java.util.ArrayList) Observable(rx.Observable) Inject(javax.inject.Inject) Pair(com.netflix.titus.common.util.tuple.Pair) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) Named(javax.inject.Named) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) JOB_PERMISSIVE_SANITIZER(com.netflix.titus.api.jobmanager.model.job.sanitizer.JobSanitizerBuilder.JOB_PERMISSIVE_SANITIZER) Logger(org.slf4j.Logger) Tag(com.netflix.spectator.api.Tag) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) JOB_STRICT_SANITIZER(com.netflix.titus.api.jobmanager.model.job.sanitizer.JobSanitizerBuilder.JOB_STRICT_SANITIZER) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) Version(com.netflix.titus.api.jobmanager.model.job.Version) Registry(com.netflix.spectator.api.Registry) ReconciliationFramework(com.netflix.titus.common.framework.reconciler.ReconciliationFramework) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Comparator(java.util.Comparator) EntitySanitizerUtil(com.netflix.titus.common.model.sanitizer.EntitySanitizerUtil) Collections(java.util.Collections) DifferenceResolvers(com.netflix.titus.common.framework.reconciler.DifferenceResolvers) Task(com.netflix.titus.api.jobmanager.model.job.Task) Optional(java.util.Optional) Observable(rx.Observable) List(java.util.List) ArrayList(java.util.ArrayList) Job(com.netflix.titus.api.jobmanager.model.job.Job) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 12 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class ObserveJobsSubscription method createJobsSnapshot.

private List<JobChangeNotification> createJobsSnapshot(Predicate<Pair<Job<?>, List<Task>>> jobsPredicate, Predicate<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, com.netflix.titus.api.jobmanager.model.job.Task>> tasksPredicate) {
    long now = titusRuntime.getClock().wallTime();
    List<JobChangeNotification> snapshot = new ArrayList<>();
    // Generics casting issue
    List allJobsAndTasksRaw = context.getJobOperations().getJobsAndTasks();
    List<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, List<com.netflix.titus.api.jobmanager.model.job.Task>>> allJobsAndTasks = allJobsAndTasksRaw;
    allJobsAndTasks.forEach(pair -> {
        com.netflix.titus.api.jobmanager.model.job.Job<?> job = pair.getLeft();
        List<com.netflix.titus.api.jobmanager.model.job.Task> tasks = pair.getRight();
        if (jobsPredicate.test(pair)) {
            snapshot.add(context.toJobChangeNotification(job, now));
        }
        tasks.forEach(task -> {
            if (tasksPredicate.test(Pair.of(job, task))) {
                snapshot.add(context.toJobChangeNotification(task, now));
            }
        });
    });
    return snapshot;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) ArrayList(java.util.ArrayList) JobChangeNotification(com.netflix.titus.grpc.protogen.JobChangeNotification) ArrayList(java.util.ArrayList) List(java.util.List) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 13 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class TaskRelocationLimitController method getTaskQuota.

private EvictionQuota getTaskQuota(Reference taskReference) {
    String taskId = taskReference.getName();
    EvictionQuota.Builder quotaBuilder = EvictionQuota.newBuilder().withReference(taskReference);
    Optional<Pair<Job<?>, Task>> jobTaskOpt = jobOperations.findTaskById(taskId);
    if (!jobTaskOpt.isPresent()) {
        return quotaBuilder.withQuota(0).withMessage("Task not found").build();
    }
    Task task = jobTaskOpt.get().getRight();
    int counter = task.getEvictionResubmitNumber();
    if (counter < perTaskLimit) {
        return quotaBuilder.withQuota(1).withMessage("Per task limit is %s, and restart count is %s", perTaskLimit, counter).build();
    }
    return quotaBuilder.withQuota(0).withMessage(taskLimitExceeded.getRejectionReason().get()).build();
}
Also used : EvictionQuota(com.netflix.titus.api.eviction.model.EvictionQuota) Task(com.netflix.titus.api.jobmanager.model.job.Task) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 14 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class TaskRelocationLimitController method consume.

@Override
public ConsumptionResult consume(String taskId) {
    Optional<Pair<Job<?>, Task>> jobTaskPair = jobOperations.findTaskById(taskId);
    if (!jobTaskPair.isPresent()) {
        return TASK_NOT_FOUND;
    }
    Task task = jobTaskPair.get().getRight();
    int counter = task.getEvictionResubmitNumber();
    if (counter >= perTaskLimit) {
        return taskLimitExceeded;
    }
    return ConsumptionResult.approved();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 15 with Pair

use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.

the class UnhealthyTasksLimitTracker method countHealthy.

private Pair<Integer, String> countHealthy() {
    List<Task> tasks;
    try {
        tasks = jobOperations.getTasks(job.getId());
    } catch (JobManagerException e) {
        return Pair.of(0, "job not found");
    }
    int healthy = 0;
    Map<String, String> notStartedOrUnhealthyTasks = new HashMap<>();
    for (Task task : tasks) {
        if (task.getStatus().getState() == TaskState.Started) {
            Optional<ContainerHealthStatus> statusOpt = containerHealthService.findHealthStatus(task.getId());
            if (statusOpt.isPresent() && statusOpt.get().getState() == ContainerHealthState.Healthy) {
                healthy++;
            } else {
                String report = statusOpt.map(status -> startWithLowercase(status.getState().name()) + '(' + status.getReason() + ')').orElse("health not found");
                notStartedOrUnhealthyTasks.put(task.getId(), report);
            }
        } else {
            notStartedOrUnhealthyTasks.put(task.getId(), String.format("Not started (current task state=%s)", task.getStatus().getState()));
        }
    }
    if (!notStartedOrUnhealthyTasks.isEmpty()) {
        StringBuilder builder = new StringBuilder("not started and healthy: ");
        builder.append("total=").append(notStartedOrUnhealthyTasks.size());
        builder.append(", tasks=[");
        int counter = 0;
        for (Map.Entry<String, String> entry : notStartedOrUnhealthyTasks.entrySet()) {
            builder.append(entry.getKey()).append('=').append(entry.getValue());
            counter++;
            if (counter < notStartedOrUnhealthyTasks.size()) {
                builder.append(", ");
            } else {
                builder.append("]");
            }
            if (counter >= TASK_ID_REPORT_LIMIT && counter < notStartedOrUnhealthyTasks.size()) {
                builder.append(",... dropped ").append(notStartedOrUnhealthyTasks.size() - counter).append(" tasks]");
            }
        }
        return Pair.of(healthy, builder.toString());
    }
    return Pair.of(healthy, healthy > minimumHealthyCount ? "" : String.format("not enough healthy containers: healthy=%s, minimum=%s", healthy, minimumHealthyCount));
}
Also used : Job(com.netflix.titus.api.jobmanager.model.job.Job) Task(com.netflix.titus.api.jobmanager.model.job.Task) EvictionQuota(com.netflix.titus.api.eviction.model.EvictionQuota) HashMap(java.util.HashMap) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Reference(com.netflix.titus.api.model.reference.Reference) UnhealthyTasksLimitDisruptionBudgetPolicy(com.netflix.titus.api.jobmanager.model.job.disruptionbudget.UnhealthyTasksLimitDisruptionBudgetPolicy) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) ContainerHealthState(com.netflix.titus.api.containerhealth.model.ContainerHealthState) List(java.util.List) ContainerHealthStatus(com.netflix.titus.api.containerhealth.model.ContainerHealthStatus) AvailabilityPercentageLimitDisruptionBudgetPolicy(com.netflix.titus.api.jobmanager.model.job.disruptionbudget.AvailabilityPercentageLimitDisruptionBudgetPolicy) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) Pair(com.netflix.titus.common.util.tuple.Pair) QuotaTracker(com.netflix.titus.master.eviction.service.quota.QuotaTracker) Map(java.util.Map) Optional(java.util.Optional) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) StringExt.startWithLowercase(com.netflix.titus.common.util.StringExt.startWithLowercase) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ContainerHealthService(com.netflix.titus.api.containerhealth.service.ContainerHealthService) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) ContainerHealthStatus(com.netflix.titus.api.containerhealth.model.ContainerHealthStatus) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Pair (com.netflix.titus.common.util.tuple.Pair)41 Task (com.netflix.titus.api.jobmanager.model.job.Task)22 List (java.util.List)21 ArrayList (java.util.ArrayList)18 Job (com.netflix.titus.api.jobmanager.model.job.Job)14 Map (java.util.Map)10 Collectors (java.util.stream.Collectors)10 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)8 Optional (java.util.Optional)8 Logger (org.slf4j.Logger)8 LoggerFactory (org.slf4j.LoggerFactory)8 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)7 HashMap (java.util.HashMap)7 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)6 Collections (java.util.Collections)6 Observable (rx.Observable)6 TaskRelocationPlan (com.netflix.titus.api.relocation.model.TaskRelocationPlan)5 Function (java.util.function.Function)5 PreparedStatement (com.datastax.driver.core.PreparedStatement)4 Session (com.datastax.driver.core.Session)4