Search in sources :

Example 1 with PodUpdatedEvent

use of com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent in project titus-control-plane by Netflix.

the class KubeNotificationProcessor method handlePodUpdatedEvent.

private Mono<Void> handlePodUpdatedEvent(PodEvent event, Job job, Task task) {
    // This is basic sanity check. If it fails, we have a major problem with pod state.
    if (event.getPod() == null || event.getPod().getStatus() == null || event.getPod().getStatus().getPhase() == null) {
        logger.warn("Pod notification with pod without status or phase set: taskId={}, pod={}", task.getId(), event.getPod());
        metricsNoChangesApplied.increment();
        return Mono.empty();
    }
    PodWrapper podWrapper = new PodWrapper(event.getPod());
    Optional<V1Node> node;
    if (event instanceof PodUpdatedEvent) {
        node = ((PodUpdatedEvent) event).getNode();
    } else if (event instanceof PodDeletedEvent) {
        node = ((PodDeletedEvent) event).getNode();
    } else {
        node = Optional.empty();
    }
    Either<TaskStatus, String> newTaskStatusOrError = new PodToTaskMapper(podWrapper, node, task, event instanceof PodDeletedEvent, containerResultCodeResolver, titusRuntime).getNewTaskStatus();
    if (newTaskStatusOrError.hasError()) {
        logger.info(newTaskStatusOrError.getError());
        metricsNoChangesApplied.increment();
        return Mono.empty();
    }
    TaskStatus newTaskStatus = newTaskStatusOrError.getValue();
    if (TaskStatus.areEquivalent(task.getStatus(), newTaskStatus)) {
        logger.info("Pod change notification does not change task status: taskId={}, status={}, eventSequenceNumber={}", task.getId(), newTaskStatus, event.getSequenceNumber());
    } else {
        logger.info("Pod notification changes task status: taskId={}, fromStatus={}, toStatus={}, eventSequenceNumber={}", task.getId(), task.getStatus(), newTaskStatus, event.getSequenceNumber());
    }
    // against most up to date task version.
    if (!updateTaskStatus(podWrapper, newTaskStatus, node, task, true).isPresent()) {
        return Mono.empty();
    }
    return ReactorExt.toMono(v3JobOperations.updateTask(task.getId(), current -> updateTaskStatus(podWrapper, newTaskStatus, node, current, false), V3JobOperations.Trigger.Kube, "Pod status updated from kubernetes node (k8phase='" + event.getPod().getStatus().getPhase() + "', taskState=" + task.getStatus().getState() + ")", KUBE_CALL_METADATA));
}
Also used : Retry(reactor.util.retry.Retry) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) V1PodStatus(io.kubernetes.client.openapi.models.V1PodStatus) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) KubeUtil(com.netflix.titus.master.kubernetes.KubeUtil) TITUS_NODE_DOMAIN(com.netflix.titus.runtime.kubernetes.KubeConstants.TITUS_NODE_DOMAIN) Duration(java.time.Duration) Map(java.util.Map) DirectKubeApiServerIntegrator(com.netflix.titus.master.kubernetes.client.DirectKubeApiServerIntegrator) Either(com.netflix.titus.common.util.tuple.Either) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) PodEvent(com.netflix.titus.master.kubernetes.client.model.PodEvent) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) PodNotFoundEvent(com.netflix.titus.master.kubernetes.client.model.PodNotFoundEvent) Timer(com.netflix.spectator.api.Timer) List(java.util.List) Optional(java.util.Optional) PodWrapper(com.netflix.titus.master.kubernetes.client.model.PodWrapper) Gauge(com.netflix.spectator.api.Gauge) Disposable(reactor.core.Disposable) Stopwatch(com.google.common.base.Stopwatch) PodDeletedEvent(com.netflix.titus.master.kubernetes.client.model.PodDeletedEvent) Counter(com.netflix.spectator.api.Counter) HashMap(java.util.HashMap) MetricConstants(com.netflix.titus.master.MetricConstants) V1Node(io.kubernetes.client.openapi.models.V1Node) Singleton(javax.inject.Singleton) Scheduler(reactor.core.scheduler.Scheduler) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Pair(com.netflix.titus.common.util.tuple.Pair) ContainerResultCodeResolver(com.netflix.titus.master.kubernetes.ContainerResultCodeResolver) Schedulers(reactor.core.scheduler.Schedulers) Evaluators.acceptNotNull(com.netflix.titus.common.util.Evaluators.acceptNotNull) KubeJobManagementReconciler(com.netflix.titus.master.kubernetes.controller.KubeJobManagementReconciler) ExecutorService(java.util.concurrent.ExecutorService) ExecutorsExt(com.netflix.titus.common.util.ExecutorsExt) Logger(org.slf4j.Logger) PodUpdatedEvent(com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent) Mono(reactor.core.publisher.Mono) Activator(com.netflix.titus.common.util.guice.annotation.Activator) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) ExecutableStatus(com.netflix.titus.api.jobmanager.model.job.ExecutableStatus) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) PodToTaskMapper(com.netflix.titus.master.kubernetes.PodToTaskMapper) V1ContainerState(io.kubernetes.client.openapi.models.V1ContainerState) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Comparator(java.util.Comparator) Evaluators(com.netflix.titus.common.util.Evaluators) PodToTaskMapper(com.netflix.titus.master.kubernetes.PodToTaskMapper) PodDeletedEvent(com.netflix.titus.master.kubernetes.client.model.PodDeletedEvent) V1Node(io.kubernetes.client.openapi.models.V1Node) PodWrapper(com.netflix.titus.master.kubernetes.client.model.PodWrapper) PodUpdatedEvent(com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus)

Example 2 with PodUpdatedEvent

use of com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent in project titus-control-plane by Netflix.

the class DefaultDirectKubeApiServerIntegrator method kubeInformerEvents.

private Flux<PodEvent> kubeInformerEvents() {
    return Flux.create(sink -> {
        ResourceEventHandler<V1Pod> handler = new ResourceEventHandler<V1Pod>() {

            @Override
            public void onAdd(V1Pod pod) {
                Stopwatch stopwatch = Stopwatch.createStarted();
                try {
                    if (!KubeUtil.isOwnedByKubeScheduler(pod)) {
                        return;
                    }
                    String taskId = pod.getSpec().getContainers().get(0).getName();
                    V1Pod old = pods.get(taskId);
                    pods.put(taskId, pod);
                    PodEvent podEvent;
                    if (old != null) {
                        podEvent = PodEvent.onUpdate(old, pod, findNode(pod));
                        metrics.onUpdate(pod);
                    } else {
                        podEvent = PodEvent.onAdd(pod);
                        metrics.onAdd(pod);
                    }
                    sink.next(podEvent);
                    logger.info("Pod Added: pod={}, sequenceNumber={}", formatPodEssentials(pod), podEvent.getSequenceNumber());
                    logger.debug("complete pod data: {}", pod);
                } finally {
                    logger.info("Pod informer onAdd: pod={}, elapsedMs={}", pod.getMetadata().getName(), stopwatch.elapsed().toMillis());
                }
            }

            @Override
            public void onUpdate(V1Pod oldPod, V1Pod newPod) {
                Stopwatch stopwatch = Stopwatch.createStarted();
                try {
                    if (!KubeUtil.isOwnedByKubeScheduler(newPod)) {
                        return;
                    }
                    metrics.onUpdate(newPod);
                    pods.put(newPod.getSpec().getContainers().get(0).getName(), newPod);
                    PodUpdatedEvent podEvent = PodEvent.onUpdate(oldPod, newPod, findNode(newPod));
                    sink.next(podEvent);
                    logger.info("Pod Updated: old={}, new={}, sequenceNumber={}", formatPodEssentials(oldPod), formatPodEssentials(newPod), podEvent.getSequenceNumber());
                    logger.debug("Complete pod data: old={}, new={}", oldPod, newPod);
                } finally {
                    logger.info("Pod informer onUpdate: pod={}, elapsedMs={}", newPod.getMetadata().getName(), stopwatch.elapsed().toMillis());
                }
            }

            @Override
            public void onDelete(V1Pod pod, boolean deletedFinalStateUnknown) {
                Stopwatch stopwatch = Stopwatch.createStarted();
                try {
                    if (!KubeUtil.isOwnedByKubeScheduler(pod)) {
                        return;
                    }
                    metrics.onDelete(pod);
                    pods.remove(pod.getSpec().getContainers().get(0).getName());
                    PodDeletedEvent podEvent = PodEvent.onDelete(pod, deletedFinalStateUnknown, findNode(pod));
                    sink.next(podEvent);
                    logger.info("Pod Deleted: {}, deletedFinalStateUnknown={}, sequenceNumber={}", formatPodEssentials(pod), deletedFinalStateUnknown, podEvent.getSequenceNumber());
                    logger.debug("complete pod data: {}", pod);
                } finally {
                    logger.info("Pod informer onDelete: pod={}, elapsedMs={}", pod.getMetadata().getName(), stopwatch.elapsed().toMillis());
                }
            }
        };
        kubeApiFacade.getPodInformer().addEventHandler(handler);
    // A listener cannot be removed from shared informer.
    // sink.onCancel(() -> ???);
    });
}
Also used : ResourceEventHandler(io.kubernetes.client.informer.ResourceEventHandler) PodDeletedEvent(com.netflix.titus.master.kubernetes.client.model.PodDeletedEvent) Stopwatch(com.google.common.base.Stopwatch) V1Pod(io.kubernetes.client.openapi.models.V1Pod) PodEvent(com.netflix.titus.master.kubernetes.client.model.PodEvent) PodUpdatedEvent(com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent)

Aggregations

Stopwatch (com.google.common.base.Stopwatch)2 PodDeletedEvent (com.netflix.titus.master.kubernetes.client.model.PodDeletedEvent)2 PodEvent (com.netflix.titus.master.kubernetes.client.model.PodEvent)2 PodUpdatedEvent (com.netflix.titus.master.kubernetes.client.model.PodUpdatedEvent)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Counter (com.netflix.spectator.api.Counter)1 Gauge (com.netflix.spectator.api.Gauge)1 Timer (com.netflix.spectator.api.Timer)1 TaskAttributes (com.netflix.titus.api.jobmanager.TaskAttributes)1 ExecutableStatus (com.netflix.titus.api.jobmanager.model.job.ExecutableStatus)1 Job (com.netflix.titus.api.jobmanager.model.job.Job)1 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)1 Task (com.netflix.titus.api.jobmanager.model.job.Task)1 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)1 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)1 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)1 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)1 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)1 CollectionsExt (com.netflix.titus.common.util.CollectionsExt)1 Evaluators (com.netflix.titus.common.util.Evaluators)1