Search in sources :

Example 6 with JobManagerEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.

the class ObserveJobsCommand method execute.

@Override
public void execute(CommandContext context) throws Exception {
    long keepAliveMs = context.getCLI().hasOption('k') ? Long.parseLong(context.getCLI().getOptionValue('k')) : -1;
    RemoteJobManagementClient service = keepAliveMs > 0 ? context.getJobManagementClientWithKeepAlive(keepAliveMs) : context.getJobManagementClient();
    Flux<JobManagerEvent<?>> events;
    Set<String> jobFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('j'));
    Set<String> taskFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('t'));
    boolean printLatency = context.getCLI().hasOption('l');
    boolean printEvents = !context.getCLI().hasOption('n');
    boolean snapshotOnly = context.getCLI().hasOption('s');
    JobEventPropagationMetrics metrics = JobEventPropagationMetrics.newExternalClientMetrics("cli", context.getTitusRuntime());
    if (context.getCLI().hasOption('i')) {
        String jobId = context.getCLI().getOptionValue('i');
        events = service.observeJob(jobId);
    } else if (jobFields.isEmpty() && taskFields.isEmpty()) {
        events = service.observeJobs(Collections.emptyMap());
    } else {
        // Special case. Fields filtering cannot be used with RemoteJobManagementClient which converts data to
        // the core model. We have to use GRPC directly.
        executeWithFiltering(context, jobFields, taskFields, printEvents, snapshotOnly);
        return;
    }
    while (true) {
        logger.info("Establishing a new connection to the job event stream endpoint...");
        executeOnce(events, metrics, printLatency, printEvents, snapshotOnly);
        if (snapshotOnly) {
            return;
        }
    }
}
Also used : JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobEventPropagationMetrics(com.netflix.titus.runtime.connector.jobmanager.JobEventPropagationMetrics) RemoteJobManagementClient(com.netflix.titus.runtime.connector.jobmanager.RemoteJobManagementClient)

Example 7 with JobManagerEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.

the class ObserveJobsCommand method executeOnce.

private void executeOnce(Flux<JobManagerEvent<?>> events, JobEventPropagationMetrics metrics, boolean printLatency, boolean printEvents, boolean snapshotOnly) throws InterruptedException {
    CountDownLatch latch = new CountDownLatch(1);
    AtomicBoolean snapshotRead = new AtomicBoolean();
    Stopwatch stopwatch = Stopwatch.createStarted();
    Disposable disposable = events.subscribe(next -> {
        if (next == JobManagerEvent.snapshotMarker()) {
            logger.info("Emitted: snapshot marker in {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
            snapshotRead.set(true);
            if (snapshotOnly) {
                latch.countDown();
            }
        } else if (next instanceof JobUpdateEvent) {
            Job<?> job = ((JobUpdateEvent) next).getCurrent();
            if (printEvents) {
                logger.info("Emitted job update: jobId={}({}), jobState={}, version={}", job.getId(), next.isArchived() ? "archived" : job.getStatus().getState(), job.getStatus(), job.getVersion());
            }
            Optional<EventPropagationTrace> trace = metrics.recordJob(((JobUpdateEvent) next).getCurrent(), !snapshotRead.get());
            if (printLatency) {
                trace.ifPresent(t -> {
                    logger.info("Event propagation data: stages={}", t);
                });
            }
        } else if (next instanceof TaskUpdateEvent) {
            Task task = ((TaskUpdateEvent) next).getCurrent();
            if (printEvents) {
                logger.info("Emitted task update: jobId={}({}), taskId={}, taskState={}, version={}", task.getJobId(), next.isArchived() ? "archived" : task.getStatus().getState(), task.getId(), task.getStatus(), task.getVersion());
            }
            Optional<EventPropagationTrace> trace = metrics.recordTask(((TaskUpdateEvent) next).getCurrent(), !snapshotRead.get());
            if (printLatency) {
                trace.ifPresent(t -> logger.info("Event propagation data: {}", t));
            }
        } else if (next instanceof JobKeepAliveEvent) {
            if (printEvents) {
                logger.info("Keep alive response: " + next);
            }
        } else {
            logger.info("Unrecognized event type: {}", next);
        }
    }, e -> {
        ErrorReports.handleReplyError("Error in the event stream", e);
        latch.countDown();
    }, () -> {
        logger.info("Event stream closed");
        latch.countDown();
    });
    latch.await();
    disposable.dispose();
}
Also used : Disposable(reactor.core.Disposable) CommandContext(com.netflix.titus.cli.CommandContext) Disposable(reactor.core.Disposable) Stopwatch(com.google.common.base.Stopwatch) ObserveJobsQuery(com.netflix.titus.grpc.protogen.ObserveJobsQuery) Task(com.netflix.titus.api.jobmanager.model.job.Task) Options(org.apache.commons.cli.Options) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StringExt(com.netflix.titus.common.util.StringExt) CliCommand(com.netflix.titus.cli.CliCommand) JobEventPropagationMetrics(com.netflix.titus.runtime.connector.jobmanager.JobEventPropagationMetrics) Option(org.apache.commons.cli.Option) EventPropagationTrace(com.netflix.titus.common.util.event.EventPropagationTrace) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) Set(java.util.Set) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobKeepAliveEvent(com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) Flux(reactor.core.publisher.Flux) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) JobManagementServiceBlockingStub(com.netflix.titus.grpc.protogen.JobManagementServiceGrpc.JobManagementServiceBlockingStub) Optional(java.util.Optional) ErrorReports(com.netflix.titus.cli.command.ErrorReports) Collections(java.util.Collections) JobChangeNotification(com.netflix.titus.grpc.protogen.JobChangeNotification) RemoteJobManagementClient(com.netflix.titus.runtime.connector.jobmanager.RemoteJobManagementClient) Task(com.netflix.titus.api.jobmanager.model.job.Task) Optional(java.util.Optional) Stopwatch(com.google.common.base.Stopwatch) JobKeepAliveEvent(com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent) CountDownLatch(java.util.concurrent.CountDownLatch) EventPropagationTrace(com.netflix.titus.common.util.event.EventPropagationTrace) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Example 8 with JobManagerEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.

the class JobUtil method loadActiveJobsAndTasks.

public static Pair<Map<String, Job>, Map<String, Map<String, Task>>> loadActiveJobsAndTasks(CommandContext context) {
    Map<String, Job> activeJobs = new HashMap<>();
    Map<String, Map<String, Task>> activeTasks = new HashMap<>();
    Iterator<JobManagerEvent<?>> it = context.getJobManagementClient().observeJobs(Collections.emptyMap()).toIterable().iterator();
    while (it.hasNext()) {
        JobManagerEvent<?> event = it.next();
        if (event instanceof JobUpdateEvent) {
            JobUpdateEvent je = (JobUpdateEvent) event;
            Job job = je.getCurrent();
            if (job.getStatus().getState() == JobState.Accepted) {
                activeJobs.put(job.getId(), job);
            }
        } else if (event instanceof TaskUpdateEvent) {
            TaskUpdateEvent te = (TaskUpdateEvent) event;
            Task task = te.getCurrent();
            if (activeJobs.containsKey(task.getJobId())) {
                activeTasks.computeIfAbsent(task.getJobId(), j -> new HashMap<>()).put(task.getId(), task);
            }
        } else if (event.equals(JobManagerEvent.snapshotMarker())) {
            break;
        }
    }
    return Pair.of(activeJobs, activeTasks);
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) Job(com.netflix.titus.api.jobmanager.model.job.Job) Map(java.util.Map) HashMap(java.util.HashMap) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Example 9 with JobManagerEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.

the class StreamDataReplicatorPerf method main.

public static void main(String[] args) throws InterruptedException {
    TitusRuntime titusRuntime = TitusRuntimes.internal();
    JobManagementClient client = Mockito.mock(JobManagementClient.class);
    JobConnectorConfiguration configuration = Mockito.mock(JobConnectorConfiguration.class);
    Mockito.when(client.observeJobs(ArgumentMatchers.any())).thenAnswer(invocation -> Flux.defer(() -> {
        JobManagerEvent jobUpdateEvent = JobUpdateEvent.newJob(JOB, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        JobManagerEvent taskUpdateEvent = TaskUpdateEvent.newTask(JOB, TASK, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        return Flux.just(jobUpdateEvent, JobManagerEvent.snapshotMarker()).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).map(tick -> taskUpdateEvent)).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).flatMap(tick -> Flux.error(new RuntimeException("Simulated error"))));
    }));
    JobDataReplicator replicator = new JobDataReplicatorProvider(configuration, client, JobSnapshotFactories.newDefault(titusRuntime), titusRuntime).get();
    replicator.events().subscribe(System.out::println);
    Thread.sleep(3600_000);
}
Also used : JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) Job(com.netflix.titus.api.jobmanager.model.job.Job) ArgumentMatchers(org.mockito.ArgumentMatchers) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactories(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactories) Task(com.netflix.titus.api.jobmanager.model.job.Task) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) Mockito(org.mockito.Mockito) Flux(reactor.core.publisher.Flux) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) Duration(java.time.Duration) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime)

Example 10 with JobManagerEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.

the class DefaultV3JobOperations method toJobManagerEvent.

private List<JobManagerEvent<?>> toJobManagerEvent(Predicate<Pair<Job<?>, List<Task>>> jobsPredicate, Predicate<Pair<Job<?>, Task>> tasksPredicate, boolean withCheckpoints, JobManagerReconcilerEvent event) {
    if (event instanceof JobCheckpointReconcilerEvent) {
        if (withCheckpoints) {
            JobCheckpointReconcilerEvent checkpoint = (JobCheckpointReconcilerEvent) event;
            return Collections.singletonList(JobManagerEvent.keepAliveEvent(checkpoint.getTimestampNano()));
        }
        return Collections.emptyList();
    }
    if (event instanceof JobNewModelReconcilerEvent) {
        JobNewModelReconcilerEvent newModelEvent = (JobNewModelReconcilerEvent) event;
        return toNewJobUpdateEvent(newModelEvent, jobsPredicate);
    }
    if (!(event instanceof JobModelUpdateReconcilerEvent)) {
        return Collections.emptyList();
    }
    JobModelUpdateReconcilerEvent modelUpdateEvent = (JobModelUpdateReconcilerEvent) event;
    if (modelUpdateEvent.getModelActionHolder().getModel() != Model.Reference) {
        return Collections.emptyList();
    }
    if (modelUpdateEvent.getChangedEntityHolder().getEntity() instanceof Job) {
        // We have to emit for this case both task archived event followed by job update event.
        if (modelUpdateEvent.getChangeAction().getTrigger() == Trigger.ReconcilerServiceTaskRemoved) {
            Task archivedTask = modelUpdateEvent.getChangeAction().getTask().orElse(null);
            if (archivedTask != null) {
                Job<?> job = modelUpdateEvent.getJob();
                TaskUpdateEvent archiveEvent = TaskUpdateEvent.taskArchived(job, archivedTask, modelUpdateEvent.getCallMetadata());
                List<JobManagerEvent<?>> events = new ArrayList<>();
                events.add(archiveEvent);
                events.addAll(toJobUpdateEvent(modelUpdateEvent, jobsPredicate));
                return events;
            }
        }
        return toJobUpdateEvent(modelUpdateEvent, jobsPredicate);
    }
    return toTaskUpdateEvent(modelUpdateEvent, tasksPredicate);
}
Also used : JobCheckpointReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobCheckpointReconcilerEvent) JobModelUpdateReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobModelReconcilerEvent.JobModelUpdateReconcilerEvent) Task(com.netflix.titus.api.jobmanager.model.job.Task) JobNewModelReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobModelReconcilerEvent.JobNewModelReconcilerEvent) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) ArrayList(java.util.ArrayList) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Aggregations

JobManagerEvent (com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent)14 Task (com.netflix.titus.api.jobmanager.model.job.Task)13 TaskUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)12 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)9 ArrayList (java.util.ArrayList)9 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)8 TitusRuntimes (com.netflix.titus.common.runtime.TitusRuntimes)8 Collections (java.util.Collections)8 List (java.util.List)8 Set (java.util.Set)8 TaskAttributes (com.netflix.titus.api.jobmanager.TaskAttributes)7 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)7 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)7 TimeUnit (java.util.concurrent.TimeUnit)7 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)7 LoadBalancer (com.netflix.titus.api.connector.cloud.LoadBalancer)6 LoadBalancerConnector (com.netflix.titus.api.connector.cloud.LoadBalancerConnector)6 ServiceJobTask (com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)6 JobManagerException (com.netflix.titus.api.jobmanager.service.JobManagerException)6 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)6