Search in sources :

Example 16 with TaskState

use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.

the class TaskScenarioBuilder method expectStateUpdateSkipOther.

public TaskScenarioBuilder expectStateUpdateSkipOther(TaskStatus.TaskState expectedState) {
    logger.info("[{}] Expecting task state {} (skipping other)...", discoverActiveTest(), expectedState);
    Stopwatch stopWatch = Stopwatch.createStarted();
    TaskState expectedCoreState = toCoreTaskState(expectedState);
    expectTaskUpdate(task -> task.getStatus().getState().ordinal() < expectedCoreState.ordinal(), task -> task.getStatus().getState() == expectedCoreState, "Expected state: " + expectedCoreState);
    logger.info("[{}] Expected task state {} received in {}[ms]", discoverActiveTest(), expectedState, stopWatch.elapsed(TimeUnit.MILLISECONDS));
    return this;
}
Also used : Stopwatch(com.google.common.base.Stopwatch) GrpcJobManagementModelConverters.toCoreTaskState(com.netflix.titus.runtime.endpoint.v3.grpc.GrpcJobManagementModelConverters.toCoreTaskState) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) GrpcJobManagementModelConverters.toGrpcTaskState(com.netflix.titus.runtime.endpoint.v3.grpc.GrpcJobManagementModelConverters.toGrpcTaskState)

Example 17 with TaskState

use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.

the class KubeNotificationProcessorTest method testUpdateTaskStatusVKWithTransitionNetworkMode.

@Test
public void testUpdateTaskStatusVKWithTransitionNetworkMode() {
    V1Pod pod = newPod(TASK.getId(), andRunning());
    V1Node node = newNode(andIpAddress("2.2.2.2"), andNodeAnnotations(TITUS_NODE_DOMAIN + "ami", "ami123", TITUS_NODE_DOMAIN + "stack", "myStack"));
    Map<String, String> UpdatedAnnotations = new HashMap<>();
    UpdatedAnnotations.put(LEGACY_ANNOTATION_IP_ADDRESS, "2001:db8:0:1234:0:567:8:1");
    UpdatedAnnotations.put(LEGACY_ANNOTATION_ENI_IP_ADDRESS, "192.0.2.1");
    UpdatedAnnotations.put(LEGACY_ANNOTATION_ENI_IPV6_ADDRESS, "2001:db8:0:1234:0:567:8:1");
    UpdatedAnnotations.put(LEGACY_ANNOTATION_NETWORK_MODE, NetworkConfiguration.NetworkMode.Ipv6AndIpv4Fallback.toString());
    pod.getMetadata().setAnnotations(UpdatedAnnotations);
    Task updatedTask = processor.updateTaskStatus(new PodWrapper(pod), TaskStatus.newBuilder().withState(TaskState.Started).build(), Optional.of(node), TASK, false).orElse(null);
    Set<TaskState> pastStates = updatedTask.getStatusHistory().stream().map(ExecutableStatus::getState).collect(Collectors.toSet());
    assertThat(pastStates).contains(TaskState.Accepted, TaskState.Launched, TaskState.StartInitiated);
    assertThat(updatedTask.getTaskContext()).containsEntry(TaskAttributes.TASK_ATTRIBUTES_AGENT_HOST, "2.2.2.2");
    assertThat(updatedTask.getTaskContext()).containsEntry(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "2001:db8:0:1234:0:567:8:1");
    assertThat(updatedTask.getTaskContext()).containsEntry(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IPV6, "2001:db8:0:1234:0:567:8:1");
    // In IPv6 + transition mode, there should *not* be a ipv4. That would be confusing because such a v4 would not
    // be unique to that task, and tools would try to use it, people would try to ssh to it, etc.
    assertThat(updatedTask.getTaskContext()).doesNotContainKey(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IPV4);
    assertThat(updatedTask.getTaskContext()).containsEntry(TaskAttributes.TASK_ATTRIBUTES_TRANSITION_IPV4, "192.0.2.1");
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) V1Node(io.kubernetes.client.openapi.models.V1Node) HashMap(java.util.HashMap) V1Pod(io.kubernetes.client.openapi.models.V1Pod) PodWrapper(com.netflix.titus.master.kubernetes.client.model.PodWrapper) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Test(org.junit.Test)

Example 18 with TaskState

use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.

the class MustBeRelocatedSelfManagedTaskCollectorStep method findAllJobTaskAgentTriples.

private List<Triple<Job<?>, Task, TitusNode>> findAllJobTaskAgentTriples(Map<String, TitusNode> nodes) {
    Map<String, TitusNode> taskToInstanceMap = RelocationUtil.buildTasksToInstanceMap(nodes, jobOperations);
    List<Triple<Job<?>, Task, TitusNode>> result = new ArrayList<>();
    jobOperations.getJobs().forEach(job -> {
        jobOperations.getTasks(job.getId()).forEach(task -> {
            TaskState taskState = task.getStatus().getState();
            if (taskState == TaskState.StartInitiated || taskState == TaskState.Started) {
                TitusNode instance = taskToInstanceMap.get(task.getId());
                if (instance != null) {
                    result.add(Triple.of(job, task, instance));
                } else {
                    logger.debug("Task in active state with no agent instance: taskId={}, state={}", task.getId(), task.getStatus().getState());
                }
            }
        });
    });
    return result;
}
Also used : Triple(com.netflix.titus.common.util.tuple.Triple) ArrayList(java.util.ArrayList) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState)

Example 19 with TaskState

use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.

the class PodToTaskMapper method handlePodDeleted.

/**
 * Handle pod object deleted event.
 */
private Either<TaskStatus, String> handlePodDeleted() {
    TaskState taskState = task.getStatus().getState();
    boolean hasKillInitiatedState = JobFunctions.findTaskStatus(task, KillInitiated).isPresent();
    String reason = podWrapper.getReason();
    long now = titusRuntime.getClock().wallTime();
    if (!hasKillInitiatedState) {
        if (NODE_LOST.equals(reason)) {
            return Either.ofValue(TaskStatus.newBuilder().withState(Finished).withReasonCode(effectiveFinalReasonCode(REASON_TASK_KILLED)).withReasonMessage("The host running the container was unexpectedly terminated").withTimestamp(now).build());
        } else {
            return Either.ofValue(TaskStatus.newBuilder().withState(Finished).withReasonCode(effectiveFinalReasonCode(REASON_TASK_KILLED)).withReasonMessage("Container was terminated without going through the Titus API").withTimestamp(now).build());
        }
    }
    String reasonCode;
    if (podWrapper.getPodPhase() == PodPhase.PENDING || podWrapper.getPodPhase() == PodPhase.RUNNING) {
        // Pod in running state that did not complete must have been terminated as well.
        if (taskState == KillInitiated && task.getStatus().getReasonCode().equals(REASON_STUCK_IN_STATE)) {
            reasonCode = REASON_TRANSIENT_SYSTEM_ERROR;
        } else {
            reasonCode = REASON_TASK_KILLED;
        }
    } else if (podWrapper.getPodPhase() == PodPhase.SUCCEEDED) {
        reasonCode = resolveFinalTaskState(REASON_NORMAL);
    } else if (podWrapper.getPodPhase() == PodPhase.FAILED) {
        reasonCode = resolveFinalTaskState(REASON_FAILED);
    } else {
        titusRuntime.getCodeInvariants().inconsistent("Pod: %s has unknown phase mapping: %s", podWrapper.getName(), podWrapper.getPodPhase());
        reasonCode = REASON_UNKNOWN;
    }
    return Either.ofValue(TaskStatus.newBuilder().withState(Finished).withReasonCode(effectiveFinalReasonCode(reasonCode)).withReasonMessage(podWrapper.getMessage()).withTimestamp(now).build());
}
Also used : TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState)

Example 20 with TaskState

use of com.netflix.titus.api.jobmanager.model.job.TaskState in project titus-control-plane by Netflix.

the class PodToTaskMapper method handlePodPendingInitializingInContainerWaitingState.

/**
 * Handle pod is assigned to a node but not running yet. The following pre-conditions must exist:
 * <ul>
 *     <li>pod condition 'PodScheduled' is set</li>
 *     <li>nodeName is not empty</li>
 * </ul>
 * Based on container state and reason message the state is classified as 'Launched' or 'StartInitiated':
 * <ul>
 *     <li>if reason != 'TASK_STARTING' set 'Launched'</li>
 *     <li>if reason == 'TASK_STARTING' set 'StartInitiated'</li>
 * </ul>
 * The state update happens only forward. Backward changes (StartInitiated -> Launched) are ignored.
 */
private Either<TaskStatus, String> handlePodPendingInitializingInContainerWaitingState(PodWrapper podWrapper) {
    TaskState newState;
    // Pod that is being setup should be in 'Waiting' state.
    if (!podWrapper.hasContainerStateWaiting()) {
        newState = Launched;
    } else {
        String reason = podWrapper.getReason();
        // inspect pod status reason to differentiate between Launched and StartInitiated (this is not standard Kubernetes)
        if (reason.equalsIgnoreCase(TASK_STARTING)) {
            newState = StartInitiated;
        } else {
            newState = Launched;
        }
    }
    // Check for races. Do not allow setting back task state.
    if (isBefore(newState, task.getStatus().getState())) {
        return unexpected(String.format("pod in state not consistent with the task state (newState=%s)", newState));
    }
    String reason = podWrapper.getReason();
    return Either.ofValue(TaskStatus.newBuilder().withState(newState).withReasonCode(StringExt.isEmpty(reason) ? REASON_POD_SCHEDULED : reason).withReasonMessage(podWrapper.getMessage()).withTimestamp(titusRuntime.getClock().wallTime()).build());
}
Also used : TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState)

Aggregations

TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)22 Task (com.netflix.titus.api.jobmanager.model.job.Task)12 ArrayList (java.util.ArrayList)9 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)7 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)7 ServiceJobExt (com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt)7 Job (com.netflix.titus.api.jobmanager.model.job.Job)6 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)6 EntityHolder (com.netflix.titus.common.framework.reconciler.EntityHolder)6 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)6 HashMap (java.util.HashMap)6 List (java.util.List)6 Optional (java.util.Optional)6 Stopwatch (com.google.common.base.Stopwatch)5 JobManagerConstants (com.netflix.titus.api.jobmanager.service.JobManagerConstants)5 JobState (com.netflix.titus.api.jobmanager.model.job.JobState)4 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)4 ChangeAction (com.netflix.titus.common.framework.reconciler.ChangeAction)4 Pair (com.netflix.titus.common.util.tuple.Pair)4 VersionSupplier (com.netflix.titus.master.jobmanager.service.VersionSupplier)4