use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.
the class KubeNotificationProcessor method fillInMissingStates.
private static Task fillInMissingStates(PodWrapper podWrapper, Task task) {
TaskState currentState = task.getStatus().getState();
if (currentState != TaskState.Started && currentState != TaskState.Finished) {
return task;
}
V1ContainerState containerState = podWrapper.findContainerState().orElse(null);
if (containerState == null) {
return task;
}
long startAtTimestamp;
if (currentState == TaskState.Started) {
if (containerState.getRunning() == null || containerState.getRunning().getStartedAt() == null) {
return task;
}
startAtTimestamp = containerState.getRunning().getStartedAt().toInstant().toEpochMilli();
} else {
// TaskState.Finished
if (containerState.getTerminated() == null || containerState.getTerminated().getStartedAt() == null) {
// It must be the case where the container setup failed.
return fillInMissingStatesForContainerSetupFailure(podWrapper, task);
}
startAtTimestamp = containerState.getTerminated().getStartedAt().toInstant().toEpochMilli();
}
TaskStatus.Builder statusTemplate = TaskStatus.newBuilder().withReasonCode(TaskStatus.REASON_STATE_MISSING).withReasonMessage("Filled in missing state update that was missed previously").withTimestamp(startAtTimestamp);
List<TaskStatus> missingStatuses = new ArrayList<>();
addIfMissing(task, TaskState.Launched, statusTemplate).ifPresent(missingStatuses::add);
addIfMissing(task, TaskState.StartInitiated, statusTemplate).ifPresent(missingStatuses::add);
addIfMissing(task, TaskState.Started, statusTemplate).ifPresent(missingStatuses::add);
if (missingStatuses.isEmpty()) {
return task;
}
List<TaskStatus> newStatusHistory = new ArrayList<>(task.getStatusHistory());
newStatusHistory.addAll(missingStatuses);
newStatusHistory.sort(Comparator.comparing(ExecutableStatus::getState));
return task.toBuilder().withStatusHistory(newStatusHistory).build();
}
use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.
the class KubeNotificationProcessor method handlePodUpdatedEvent.
private Mono<Void> handlePodUpdatedEvent(PodEvent event, Job job, Task task) {
// This is basic sanity check. If it fails, we have a major problem with pod state.
if (event.getPod() == null || event.getPod().getStatus() == null || event.getPod().getStatus().getPhase() == null) {
logger.warn("Pod notification with pod without status or phase set: taskId={}, pod={}", task.getId(), event.getPod());
metricsNoChangesApplied.increment();
return Mono.empty();
}
PodWrapper podWrapper = new PodWrapper(event.getPod());
Optional<V1Node> node;
if (event instanceof PodUpdatedEvent) {
node = ((PodUpdatedEvent) event).getNode();
} else if (event instanceof PodDeletedEvent) {
node = ((PodDeletedEvent) event).getNode();
} else {
node = Optional.empty();
}
Either<TaskStatus, String> newTaskStatusOrError = new PodToTaskMapper(podWrapper, node, task, event instanceof PodDeletedEvent, containerResultCodeResolver, titusRuntime).getNewTaskStatus();
if (newTaskStatusOrError.hasError()) {
logger.info(newTaskStatusOrError.getError());
metricsNoChangesApplied.increment();
return Mono.empty();
}
TaskStatus newTaskStatus = newTaskStatusOrError.getValue();
if (TaskStatus.areEquivalent(task.getStatus(), newTaskStatus)) {
logger.info("Pod change notification does not change task status: taskId={}, status={}, eventSequenceNumber={}", task.getId(), newTaskStatus, event.getSequenceNumber());
} else {
logger.info("Pod notification changes task status: taskId={}, fromStatus={}, toStatus={}, eventSequenceNumber={}", task.getId(), task.getStatus(), newTaskStatus, event.getSequenceNumber());
}
// against most up to date task version.
if (!updateTaskStatus(podWrapper, newTaskStatus, node, task, true).isPresent()) {
return Mono.empty();
}
return ReactorExt.toMono(v3JobOperations.updateTask(task.getId(), current -> updateTaskStatus(podWrapper, newTaskStatus, node, current, false), V3JobOperations.Trigger.Kube, "Pod status updated from kubernetes node (k8phase='" + event.getPod().getStatus().getPhase() + "', taskState=" + task.getStatus().getState() + ")", KUBE_CALL_METADATA));
}
use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.
the class DifferenceResolverUtils method countActiveNotStartedTasks.
public static int countActiveNotStartedTasks(EntityHolder refJobHolder, EntityHolder runningJobHolder) {
Set<String> pendingTaskIds = new HashSet<>();
Consumer<EntityHolder> countingFun = jobHolder -> jobHolder.getChildren().forEach(taskHolder -> {
TaskState state = ((Task) taskHolder.getEntity()).getStatus().getState();
if (state != TaskState.Started && state != TaskState.Finished) {
pendingTaskIds.add(taskHolder.getId());
}
});
countingFun.accept(refJobHolder);
countingFun.accept(runningJobHolder);
return pendingTaskIds.size();
}
use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.
the class TaskTimeoutChangeActions method getTimeoutStatus.
public static TimeoutStatus getTimeoutStatus(EntityHolder taskHolder, Clock clock) {
Task task = taskHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state != TaskState.Launched && state != TaskState.StartInitiated && state != TaskState.KillInitiated) {
return TimeoutStatus.Ignore;
}
Long deadline = (Long) taskHolder.getAttributes().get(STATE_TAGS.get(state));
if (deadline == null) {
return TimeoutStatus.NotSet;
}
return clock.wallTime() < deadline ? TimeoutStatus.Pending : TimeoutStatus.TimedOut;
}
use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.
the class TaskTimeoutChangeActions method setTimeout.
public static TitusChangeAction setTimeout(String taskId, TaskState taskState, long timeoutMs, Clock clock) {
String tagName = STATE_TAGS.get(taskState);
Preconditions.checkArgument(tagName != null, "Timeout not tracked for state %s", taskState);
return TitusChangeAction.newAction("setTimeout").id(taskId).trigger(Trigger.Reconciler).summary("Setting timeout for task in state %s: %s", taskState, DateTimeExt.toTimeUnitString(timeoutMs)).callMetadata(JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason("configure timeout").build()).applyModelUpdate(self -> {
TitusModelAction modelAction = TitusModelAction.newModelUpdate(self).taskMaybeUpdate(jobHolder -> jobHolder.findById(taskId).map(taskHolder -> {
EntityHolder newTaskHolder = taskHolder.addTag(tagName, clock.wallTime() + timeoutMs);
if (taskState == TaskState.KillInitiated) {
newTaskHolder = newTaskHolder.addTag(KILL_INITIATED_ATTEMPT_TAG, 0);
}
return Pair.of(jobHolder.addChild(newTaskHolder), newTaskHolder);
}));
return ModelActionHolder.running(modelAction);
});
}
Aggregations