use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.
the class JobReconciliationFrameworkFactory method loadJobsAndTasksFromStore.
private List<Pair<Job, List<Task>>> loadJobsAndTasksFromStore(InitializationErrorCollector errorCollector) {
long startTime = clock.wallTime();
// load all job/task pairs
List<Pair<Job, Pair<List<Task>, Integer>>> jobTasksPairs;
try {
jobTasksPairs = store.init().andThen(store.retrieveJobs().flatMap(retrievedJobsAndErrors -> {
errorCollector.corruptedJobRecords(retrievedJobsAndErrors.getRight());
List<Job<?>> retrievedJobs = retrievedJobsAndErrors.getLeft();
List<Observable<Pair<Job, Pair<List<Task>, Integer>>>> retrieveTasksObservables = new ArrayList<>();
for (Job job : retrievedJobs) {
// TODO Finished jobs that were not archived immediately should be archived by background archive process
if (job.getStatus().getState() == JobState.Finished) {
logger.info("Not loading finished job: {}", job.getId());
continue;
}
Optional<Job> validatedJob = validateJob(job);
if (validatedJob.isPresent()) {
Observable<Pair<Job, Pair<List<Task>, Integer>>> retrieveTasksObservable = store.retrieveTasksForJob(job.getId()).map(taskList -> new Pair<>(validatedJob.get(), taskList));
retrieveTasksObservables.add(retrieveTasksObservable);
} else {
errorCollector.invalidJob(job.getId());
}
}
return Observable.merge(retrieveTasksObservables, MAX_RETRIEVE_TASK_CONCURRENCY);
})).toList().toBlocking().singleOrDefault(Collections.emptyList());
int corruptedTaskRecords = jobTasksPairs.stream().mapToInt(p -> p.getRight().getRight()).sum();
errorCollector.corruptedTaskRecords(corruptedTaskRecords);
int taskCount = jobTasksPairs.stream().map(p -> p.getRight().getLeft().size()).reduce(0, (a, v) -> a + v);
loadedJobs.set(jobTasksPairs.size());
loadedTasks.set(taskCount);
for (Pair<Job, Pair<List<Task>, Integer>> jobTaskPair : jobTasksPairs) {
Job job = jobTaskPair.getLeft();
List<Task> tasks = jobTaskPair.getRight().getLeft();
List<String> taskStrings = tasks.stream().map(t -> String.format("<%s,ks:%s>", t.getId(), t.getStatus().getState())).collect(Collectors.toList());
logger.info("Loaded job: {} with tasks: {}", job.getId(), taskStrings);
}
logger.info("{} jobs and {} tasks loaded from store in {}ms", jobTasksPairs.size(), taskCount, clock.wallTime() - startTime);
} catch (Exception e) {
logger.error("Failed to load jobs from the store during initialization:", e);
throw new IllegalStateException("Failed to load jobs from the store during initialization", e);
} finally {
storeLoadTimeMs.set(clock.wallTime() - startTime);
}
return jobTasksPairs.stream().map(p -> Pair.of(p.getLeft(), p.getRight().getLeft())).collect(Collectors.toList());
}
use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.
the class ObserveJobsSubscription method createJobsSnapshot.
private List<JobChangeNotification> createJobsSnapshot(Predicate<Pair<Job<?>, List<Task>>> jobsPredicate, Predicate<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, com.netflix.titus.api.jobmanager.model.job.Task>> tasksPredicate) {
long now = titusRuntime.getClock().wallTime();
List<JobChangeNotification> snapshot = new ArrayList<>();
// Generics casting issue
List allJobsAndTasksRaw = context.getJobOperations().getJobsAndTasks();
List<Pair<com.netflix.titus.api.jobmanager.model.job.Job<?>, List<com.netflix.titus.api.jobmanager.model.job.Task>>> allJobsAndTasks = allJobsAndTasksRaw;
allJobsAndTasks.forEach(pair -> {
com.netflix.titus.api.jobmanager.model.job.Job<?> job = pair.getLeft();
List<com.netflix.titus.api.jobmanager.model.job.Task> tasks = pair.getRight();
if (jobsPredicate.test(pair)) {
snapshot.add(context.toJobChangeNotification(job, now));
}
tasks.forEach(task -> {
if (tasksPredicate.test(Pair.of(job, task))) {
snapshot.add(context.toJobChangeNotification(task, now));
}
});
});
return snapshot;
}
use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.
the class TaskRelocationLimitController method getTaskQuota.
private EvictionQuota getTaskQuota(Reference taskReference) {
String taskId = taskReference.getName();
EvictionQuota.Builder quotaBuilder = EvictionQuota.newBuilder().withReference(taskReference);
Optional<Pair<Job<?>, Task>> jobTaskOpt = jobOperations.findTaskById(taskId);
if (!jobTaskOpt.isPresent()) {
return quotaBuilder.withQuota(0).withMessage("Task not found").build();
}
Task task = jobTaskOpt.get().getRight();
int counter = task.getEvictionResubmitNumber();
if (counter < perTaskLimit) {
return quotaBuilder.withQuota(1).withMessage("Per task limit is %s, and restart count is %s", perTaskLimit, counter).build();
}
return quotaBuilder.withQuota(0).withMessage(taskLimitExceeded.getRejectionReason().get()).build();
}
use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.
the class TaskRelocationLimitController method consume.
@Override
public ConsumptionResult consume(String taskId) {
Optional<Pair<Job<?>, Task>> jobTaskPair = jobOperations.findTaskById(taskId);
if (!jobTaskPair.isPresent()) {
return TASK_NOT_FOUND;
}
Task task = jobTaskPair.get().getRight();
int counter = task.getEvictionResubmitNumber();
if (counter >= perTaskLimit) {
return taskLimitExceeded;
}
return ConsumptionResult.approved();
}
use of com.netflix.titus.common.util.tuple.Pair in project titus-control-plane by Netflix.
the class UnhealthyTasksLimitTracker method countHealthy.
private Pair<Integer, String> countHealthy() {
List<Task> tasks;
try {
tasks = jobOperations.getTasks(job.getId());
} catch (JobManagerException e) {
return Pair.of(0, "job not found");
}
int healthy = 0;
Map<String, String> notStartedOrUnhealthyTasks = new HashMap<>();
for (Task task : tasks) {
if (task.getStatus().getState() == TaskState.Started) {
Optional<ContainerHealthStatus> statusOpt = containerHealthService.findHealthStatus(task.getId());
if (statusOpt.isPresent() && statusOpt.get().getState() == ContainerHealthState.Healthy) {
healthy++;
} else {
String report = statusOpt.map(status -> startWithLowercase(status.getState().name()) + '(' + status.getReason() + ')').orElse("health not found");
notStartedOrUnhealthyTasks.put(task.getId(), report);
}
} else {
notStartedOrUnhealthyTasks.put(task.getId(), String.format("Not started (current task state=%s)", task.getStatus().getState()));
}
}
if (!notStartedOrUnhealthyTasks.isEmpty()) {
StringBuilder builder = new StringBuilder("not started and healthy: ");
builder.append("total=").append(notStartedOrUnhealthyTasks.size());
builder.append(", tasks=[");
int counter = 0;
for (Map.Entry<String, String> entry : notStartedOrUnhealthyTasks.entrySet()) {
builder.append(entry.getKey()).append('=').append(entry.getValue());
counter++;
if (counter < notStartedOrUnhealthyTasks.size()) {
builder.append(", ");
} else {
builder.append("]");
}
if (counter >= TASK_ID_REPORT_LIMIT && counter < notStartedOrUnhealthyTasks.size()) {
builder.append(",... dropped ").append(notStartedOrUnhealthyTasks.size() - counter).append(" tasks]");
}
}
return Pair.of(healthy, builder.toString());
}
return Pair.of(healthy, healthy > minimumHealthyCount ? "" : String.format("not enough healthy containers: healthy=%s, minimum=%s", healthy, minimumHealthyCount));
}
Aggregations