Search in sources :

Example 31 with Task

use of com.netflix.titus.api.jobmanager.model.job.Task in project titus-control-plane by Netflix.

the class ObserveJobsSubscription method createJobsSnapshot.

private List<JobChangeNotification> createJobsSnapshot(Predicate<Pair<Job<?>, List<Task>>> jobsPredicate, Predicate<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, com.netflix.titus.api.jobmanager.model.job.Task>> tasksPredicate) {
    long now = titusRuntime.getClock().wallTime();
    List<JobChangeNotification> snapshot = new ArrayList<>();
    // Generics casting issue
    List allJobsAndTasksRaw = context.getJobOperations().getJobsAndTasks();
    List<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, List<com.netflix.titus.api.jobmanager.model.job.Task>>> allJobsAndTasks = allJobsAndTasksRaw;
    allJobsAndTasks.forEach(pair -> {
        com.netflix.titus.api.jobmanager.model.job.Job<?> job = pair.getLeft();
        List<com.netflix.titus.api.jobmanager.model.job.Task> tasks = pair.getRight();
        if (jobsPredicate.test(pair)) {
            snapshot.add(context.toJobChangeNotification(job, now));
        }
        tasks.forEach(task -> {
            if (tasksPredicate.test(Pair.of(job, task))) {
                snapshot.add(context.toJobChangeNotification(task, now));
            }
        });
    });
    return snapshot;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) ArrayList(java.util.ArrayList) JobChangeNotification(com.netflix.titus.grpc.protogen.JobChangeNotification) ArrayList(java.util.ArrayList) List(java.util.List) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 32 with Task

use of com.netflix.titus.api.jobmanager.model.job.Task in project titus-control-plane by Netflix.

the class TaskRelocationLimitController method getTaskQuota.

private EvictionQuota getTaskQuota(Reference taskReference) {
    String taskId = taskReference.getName();
    EvictionQuota.Builder quotaBuilder = EvictionQuota.newBuilder().withReference(taskReference);
    Optional<Pair<Job<?>, Task>> jobTaskOpt = jobOperations.findTaskById(taskId);
    if (!jobTaskOpt.isPresent()) {
        return quotaBuilder.withQuota(0).withMessage("Task not found").build();
    }
    Task task = jobTaskOpt.get().getRight();
    int counter = task.getEvictionResubmitNumber();
    if (counter < perTaskLimit) {
        return quotaBuilder.withQuota(1).withMessage("Per task limit is %s, and restart count is %s", perTaskLimit, counter).build();
    }
    return quotaBuilder.withQuota(0).withMessage(taskLimitExceeded.getRejectionReason().get()).build();
}
Also used : EvictionQuota(com.netflix.titus.api.eviction.model.EvictionQuota) Task(com.netflix.titus.api.jobmanager.model.job.Task) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 33 with Task

use of com.netflix.titus.api.jobmanager.model.job.Task in project titus-control-plane by Netflix.

the class TaskRelocationLimitController method getJobQuota.

private EvictionQuota getJobQuota(Reference jobReference) {
    EvictionQuota.Builder quotaBuilder = EvictionQuota.newBuilder().withReference(jobReference);
    List<Task> tasks;
    try {
        tasks = jobOperations.getTasks(job.getId());
    } catch (JobManagerException e) {
        return quotaBuilder.withQuota(0).withMessage("Internal error: %s", e.getMessage()).build();
    }
    int quota = 0;
    for (Task task : tasks) {
        if (task.getEvictionResubmitNumber() < perTaskLimit) {
            quota++;
        }
    }
    return quota > 0 ? quotaBuilder.withQuota(quota).withMessage("Per task limit is %s", perTaskLimit).build() : quotaBuilder.withQuota(0).withMessage("Each task of the job reached its maximum eviction limit %s", perTaskLimit).build();
}
Also used : EvictionQuota(com.netflix.titus.api.eviction.model.EvictionQuota) Task(com.netflix.titus.api.jobmanager.model.job.Task) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException)

Example 34 with Task

use of com.netflix.titus.api.jobmanager.model.job.Task in project titus-control-plane by Netflix.

the class TaskRelocationLimitController method consume.

@Override
public ConsumptionResult consume(String taskId) {
    Optional<Pair<Job<?>, Task>> jobTaskPair = jobOperations.findTaskById(taskId);
    if (!jobTaskPair.isPresent()) {
        return TASK_NOT_FOUND;
    }
    Task task = jobTaskPair.get().getRight();
    int counter = task.getEvictionResubmitNumber();
    if (counter >= perTaskLimit) {
        return taskLimitExceeded;
    }
    return ConsumptionResult.approved();
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) Pair(com.netflix.titus.common.util.tuple.Pair)

Example 35 with Task

use of com.netflix.titus.api.jobmanager.model.job.Task in project titus-control-plane by Netflix.

the class UnhealthyTasksLimitTracker method countHealthy.

private Pair<Integer, String> countHealthy() {
    List<Task> tasks;
    try {
        tasks = jobOperations.getTasks(job.getId());
    } catch (JobManagerException e) {
        return Pair.of(0, "job not found");
    }
    int healthy = 0;
    Map<String, String> notStartedOrUnhealthyTasks = new HashMap<>();
    for (Task task : tasks) {
        if (task.getStatus().getState() == TaskState.Started) {
            Optional<ContainerHealthStatus> statusOpt = containerHealthService.findHealthStatus(task.getId());
            if (statusOpt.isPresent() && statusOpt.get().getState() == ContainerHealthState.Healthy) {
                healthy++;
            } else {
                String report = statusOpt.map(status -> startWithLowercase(status.getState().name()) + '(' + status.getReason() + ')').orElse("health not found");
                notStartedOrUnhealthyTasks.put(task.getId(), report);
            }
        } else {
            notStartedOrUnhealthyTasks.put(task.getId(), String.format("Not started (current task state=%s)", task.getStatus().getState()));
        }
    }
    if (!notStartedOrUnhealthyTasks.isEmpty()) {
        StringBuilder builder = new StringBuilder("not started and healthy: ");
        builder.append("total=").append(notStartedOrUnhealthyTasks.size());
        builder.append(", tasks=[");
        int counter = 0;
        for (Map.Entry<String, String> entry : notStartedOrUnhealthyTasks.entrySet()) {
            builder.append(entry.getKey()).append('=').append(entry.getValue());
            counter++;
            if (counter < notStartedOrUnhealthyTasks.size()) {
                builder.append(", ");
            } else {
                builder.append("]");
            }
            if (counter >= TASK_ID_REPORT_LIMIT && counter < notStartedOrUnhealthyTasks.size()) {
                builder.append(",... dropped ").append(notStartedOrUnhealthyTasks.size() - counter).append(" tasks]");
            }
        }
        return Pair.of(healthy, builder.toString());
    }
    return Pair.of(healthy, healthy > minimumHealthyCount ? "" : String.format("not enough healthy containers: healthy=%s, minimum=%s", healthy, minimumHealthyCount));
}
Also used : Job(com.netflix.titus.api.jobmanager.model.job.Job) Task(com.netflix.titus.api.jobmanager.model.job.Task) EvictionQuota(com.netflix.titus.api.eviction.model.EvictionQuota) HashMap(java.util.HashMap) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Reference(com.netflix.titus.api.model.reference.Reference) UnhealthyTasksLimitDisruptionBudgetPolicy(com.netflix.titus.api.jobmanager.model.job.disruptionbudget.UnhealthyTasksLimitDisruptionBudgetPolicy) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) ContainerHealthState(com.netflix.titus.api.containerhealth.model.ContainerHealthState) List(java.util.List) ContainerHealthStatus(com.netflix.titus.api.containerhealth.model.ContainerHealthStatus) AvailabilityPercentageLimitDisruptionBudgetPolicy(com.netflix.titus.api.jobmanager.model.job.disruptionbudget.AvailabilityPercentageLimitDisruptionBudgetPolicy) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) Pair(com.netflix.titus.common.util.tuple.Pair) QuotaTracker(com.netflix.titus.master.eviction.service.quota.QuotaTracker) Map(java.util.Map) Optional(java.util.Optional) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) StringExt.startWithLowercase(com.netflix.titus.common.util.StringExt.startWithLowercase) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ContainerHealthService(com.netflix.titus.api.containerhealth.service.ContainerHealthService) Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) ContainerHealthStatus(com.netflix.titus.api.containerhealth.model.ContainerHealthStatus) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Task (com.netflix.titus.api.jobmanager.model.job.Task)222 Test (org.junit.Test)98 ArrayList (java.util.ArrayList)63 List (java.util.List)62 Job (com.netflix.titus.api.jobmanager.model.job.Job)58 BatchJobTask (com.netflix.titus.api.jobmanager.model.job.BatchJobTask)45 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)45 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)42 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)38 BatchJobExt (com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt)34 Pair (com.netflix.titus.common.util.tuple.Pair)32 V1Pod (io.kubernetes.client.openapi.models.V1Pod)32 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)31 ServiceJobTask (com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)29 Optional (java.util.Optional)27 Collections (java.util.Collections)26 Collectors (java.util.stream.Collectors)25 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)24 HashMap (java.util.HashMap)24 TaskUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)23