use of com.netflix.titus.api.jobmanager.model.job.TaskStatus in project titus-control-plane by Netflix.
the class KubeNotificationProcessor method fillInMissingStates.
private static Task fillInMissingStates(PodWrapper podWrapper, Task task) {
TaskState currentState = task.getStatus().getState();
if (currentState != TaskState.Started && currentState != TaskState.Finished) {
return task;
}
V1ContainerState containerState = podWrapper.findContainerState().orElse(null);
if (containerState == null) {
return task;
}
long startAtTimestamp;
if (currentState == TaskState.Started) {
if (containerState.getRunning() == null || containerState.getRunning().getStartedAt() == null) {
return task;
}
startAtTimestamp = containerState.getRunning().getStartedAt().toInstant().toEpochMilli();
} else {
// TaskState.Finished
if (containerState.getTerminated() == null || containerState.getTerminated().getStartedAt() == null) {
// It must be the case where the container setup failed.
return fillInMissingStatesForContainerSetupFailure(podWrapper, task);
}
startAtTimestamp = containerState.getTerminated().getStartedAt().toInstant().toEpochMilli();
}
TaskStatus.Builder statusTemplate = TaskStatus.newBuilder().withReasonCode(TaskStatus.REASON_STATE_MISSING).withReasonMessage("Filled in missing state update that was missed previously").withTimestamp(startAtTimestamp);
List<TaskStatus> missingStatuses = new ArrayList<>();
addIfMissing(task, TaskState.Launched, statusTemplate).ifPresent(missingStatuses::add);
addIfMissing(task, TaskState.StartInitiated, statusTemplate).ifPresent(missingStatuses::add);
addIfMissing(task, TaskState.Started, statusTemplate).ifPresent(missingStatuses::add);
if (missingStatuses.isEmpty()) {
return task;
}
List<TaskStatus> newStatusHistory = new ArrayList<>(task.getStatusHistory());
newStatusHistory.addAll(missingStatuses);
newStatusHistory.sort(Comparator.comparing(ExecutableStatus::getState));
return task.toBuilder().withStatusHistory(newStatusHistory).build();
}
use of com.netflix.titus.api.jobmanager.model.job.TaskStatus in project titus-control-plane by Netflix.
the class KubeNotificationProcessor method handlePodUpdatedEvent.
private Mono<Void> handlePodUpdatedEvent(PodEvent event, Job job, Task task) {
// This is basic sanity check. If it fails, we have a major problem with pod state.
if (event.getPod() == null || event.getPod().getStatus() == null || event.getPod().getStatus().getPhase() == null) {
logger.warn("Pod notification with pod without status or phase set: taskId={}, pod={}", task.getId(), event.getPod());
metricsNoChangesApplied.increment();
return Mono.empty();
}
PodWrapper podWrapper = new PodWrapper(event.getPod());
Optional<V1Node> node;
if (event instanceof PodUpdatedEvent) {
node = ((PodUpdatedEvent) event).getNode();
} else if (event instanceof PodDeletedEvent) {
node = ((PodDeletedEvent) event).getNode();
} else {
node = Optional.empty();
}
Either<TaskStatus, String> newTaskStatusOrError = new PodToTaskMapper(podWrapper, node, task, event instanceof PodDeletedEvent, containerResultCodeResolver, titusRuntime).getNewTaskStatus();
if (newTaskStatusOrError.hasError()) {
logger.info(newTaskStatusOrError.getError());
metricsNoChangesApplied.increment();
return Mono.empty();
}
TaskStatus newTaskStatus = newTaskStatusOrError.getValue();
if (TaskStatus.areEquivalent(task.getStatus(), newTaskStatus)) {
logger.info("Pod change notification does not change task status: taskId={}, status={}, eventSequenceNumber={}", task.getId(), newTaskStatus, event.getSequenceNumber());
} else {
logger.info("Pod notification changes task status: taskId={}, fromStatus={}, toStatus={}, eventSequenceNumber={}", task.getId(), task.getStatus(), newTaskStatus, event.getSequenceNumber());
}
// against most up to date task version.
if (!updateTaskStatus(podWrapper, newTaskStatus, node, task, true).isPresent()) {
return Mono.empty();
}
return ReactorExt.toMono(v3JobOperations.updateTask(task.getId(), current -> updateTaskStatus(podWrapper, newTaskStatus, node, current, false), V3JobOperations.Trigger.Kube, "Pod status updated from kubernetes node (k8phase='" + event.getPod().getStatus().getPhase() + "', taskState=" + task.getStatus().getState() + ")", KUBE_CALL_METADATA));
}
use of com.netflix.titus.api.jobmanager.model.job.TaskStatus in project titus-control-plane by Netflix.
the class DifferenceResolverUtils method shouldRetry.
public static boolean shouldRetry(Job<?> job, Task task) {
TaskStatus taskStatus = task.getStatus();
if (taskStatus.getState() != TaskState.Finished || job.getStatus().getState() != JobState.Accepted) {
return false;
}
if (hasReachedRetryLimit(job, task)) {
return false;
}
if (!isBatch(job)) {
return true;
}
// Batch job
String killInitiatedReason = JobFunctions.findTaskStatus(task, TaskState.KillInitiated).map(ExecutableStatus::getReasonCode).orElse("N/A");
if (TaskStatus.REASON_RUNTIME_LIMIT_EXCEEDED.equals(killInitiatedReason)) {
BatchJobExt batchExt = (BatchJobExt) job.getJobDescriptor().getExtensions();
if (!batchExt.isRetryOnRuntimeLimit()) {
return false;
}
}
return !TaskStatus.REASON_NORMAL.equals(taskStatus.getReasonCode());
}
use of com.netflix.titus.api.jobmanager.model.job.TaskStatus in project titus-control-plane by Netflix.
the class PodToTaskMapperTest method testTaskStateAheadOfPodInRunningState.
@Test
public void testTaskStateAheadOfPodInRunningState() {
Task task = newTask(TaskState.KillInitiated);
V1Pod pod = newPod(andPhase("Running"), andScheduled(), andRunning(), andReason(TaskStatus.REASON_NORMAL));
Either<TaskStatus, String> result = updateMapper(task, pod).getNewTaskStatus();
assertErrorMessage(result, "pod state (Running) not consistent with the task state");
}
use of com.netflix.titus.api.jobmanager.model.job.TaskStatus in project titus-control-plane by Netflix.
the class PodToTaskMapperTest method testUpdatesIgnoredWhenTaskFinished.
@Test
public void testUpdatesIgnoredWhenTaskFinished() {
Task task = newTask(TaskState.Finished);
V1Pod pod = newPod(andPhase("Pending"));
Either<TaskStatus, String> result = updateMapper(task, pod).getNewTaskStatus();
assertErrorMessage(result, "task already marked as finished");
}
Aggregations