Search in sources :

Example 1 with JobUpdateEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent in project titus-control-plane by Netflix.

the class LocalCacheQueryProcessor method toObserveJobsEvent.

private Optional<JobChangeNotification> toObserveJobsEvent(JobSnapshot snapshot, JobManagerEvent<?> event, long now, V3JobQueryCriteriaEvaluator jobsPredicate, V3TaskQueryCriteriaEvaluator tasksPredicate, Set<String> jobFields, Set<String> taskFields) {
    if (event instanceof JobUpdateEvent) {
        JobUpdateEvent jobUpdateEvent = (JobUpdateEvent) event;
        Job<?> job = jobUpdateEvent.getCurrent();
        List<com.netflix.titus.api.jobmanager.model.job.Task> tasks = new ArrayList<>(snapshot.getTasks(job.getId()).values());
        return jobsPredicate.test(Pair.of(job, tasks)) ? Optional.of(toGrpcJobEvent(job, now, jobFields)) : Optional.empty();
    }
    if (event instanceof TaskUpdateEvent) {
        TaskUpdateEvent taskUpdateEvent = (TaskUpdateEvent) event;
        Job<?> job = taskUpdateEvent.getCurrentJob();
        com.netflix.titus.api.jobmanager.model.job.Task task = taskUpdateEvent.getCurrentTask();
        return tasksPredicate.test(Pair.of(job, task)) ? Optional.of(toGrpcTaskEvent(task, taskUpdateEvent.isMovedFromAnotherJob(), now, taskFields)) : Optional.empty();
    }
    return Optional.empty();
}
Also used : Task(com.netflix.titus.grpc.protogen.Task) ArrayList(java.util.ArrayList) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Example 2 with JobUpdateEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent in project titus-control-plane by Netflix.

the class ObserveJobsCommand method executeOnce.

private void executeOnce(Flux<JobManagerEvent<?>> events, JobEventPropagationMetrics metrics, boolean printLatency, boolean printEvents, boolean snapshotOnly) throws InterruptedException {
    CountDownLatch latch = new CountDownLatch(1);
    AtomicBoolean snapshotRead = new AtomicBoolean();
    Stopwatch stopwatch = Stopwatch.createStarted();
    Disposable disposable = events.subscribe(next -> {
        if (next == JobManagerEvent.snapshotMarker()) {
            logger.info("Emitted: snapshot marker in {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
            snapshotRead.set(true);
            if (snapshotOnly) {
                latch.countDown();
            }
        } else if (next instanceof JobUpdateEvent) {
            Job<?> job = ((JobUpdateEvent) next).getCurrent();
            if (printEvents) {
                logger.info("Emitted job update: jobId={}({}), jobState={}, version={}", job.getId(), next.isArchived() ? "archived" : job.getStatus().getState(), job.getStatus(), job.getVersion());
            }
            Optional<EventPropagationTrace> trace = metrics.recordJob(((JobUpdateEvent) next).getCurrent(), !snapshotRead.get());
            if (printLatency) {
                trace.ifPresent(t -> {
                    logger.info("Event propagation data: stages={}", t);
                });
            }
        } else if (next instanceof TaskUpdateEvent) {
            Task task = ((TaskUpdateEvent) next).getCurrent();
            if (printEvents) {
                logger.info("Emitted task update: jobId={}({}), taskId={}, taskState={}, version={}", task.getJobId(), next.isArchived() ? "archived" : task.getStatus().getState(), task.getId(), task.getStatus(), task.getVersion());
            }
            Optional<EventPropagationTrace> trace = metrics.recordTask(((TaskUpdateEvent) next).getCurrent(), !snapshotRead.get());
            if (printLatency) {
                trace.ifPresent(t -> logger.info("Event propagation data: {}", t));
            }
        } else if (next instanceof JobKeepAliveEvent) {
            if (printEvents) {
                logger.info("Keep alive response: " + next);
            }
        } else {
            logger.info("Unrecognized event type: {}", next);
        }
    }, e -> {
        ErrorReports.handleReplyError("Error in the event stream", e);
        latch.countDown();
    }, () -> {
        logger.info("Event stream closed");
        latch.countDown();
    });
    latch.await();
    disposable.dispose();
}
Also used : Disposable(reactor.core.Disposable) CommandContext(com.netflix.titus.cli.CommandContext) Disposable(reactor.core.Disposable) Stopwatch(com.google.common.base.Stopwatch) ObserveJobsQuery(com.netflix.titus.grpc.protogen.ObserveJobsQuery) Task(com.netflix.titus.api.jobmanager.model.job.Task) Options(org.apache.commons.cli.Options) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StringExt(com.netflix.titus.common.util.StringExt) CliCommand(com.netflix.titus.cli.CliCommand) JobEventPropagationMetrics(com.netflix.titus.runtime.connector.jobmanager.JobEventPropagationMetrics) Option(org.apache.commons.cli.Option) EventPropagationTrace(com.netflix.titus.common.util.event.EventPropagationTrace) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) Set(java.util.Set) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobKeepAliveEvent(com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) Flux(reactor.core.publisher.Flux) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) JobManagementServiceBlockingStub(com.netflix.titus.grpc.protogen.JobManagementServiceGrpc.JobManagementServiceBlockingStub) Optional(java.util.Optional) ErrorReports(com.netflix.titus.cli.command.ErrorReports) Collections(java.util.Collections) JobChangeNotification(com.netflix.titus.grpc.protogen.JobChangeNotification) RemoteJobManagementClient(com.netflix.titus.runtime.connector.jobmanager.RemoteJobManagementClient) Task(com.netflix.titus.api.jobmanager.model.job.Task) Optional(java.util.Optional) Stopwatch(com.google.common.base.Stopwatch) JobKeepAliveEvent(com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent) CountDownLatch(java.util.concurrent.CountDownLatch) EventPropagationTrace(com.netflix.titus.common.util.event.EventPropagationTrace) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Example 3 with JobUpdateEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent in project titus-control-plane by Netflix.

the class JobUtil method loadActiveJobsAndTasks.

public static Pair<Map<String, Job>, Map<String, Map<String, Task>>> loadActiveJobsAndTasks(CommandContext context) {
    Map<String, Job> activeJobs = new HashMap<>();
    Map<String, Map<String, Task>> activeTasks = new HashMap<>();
    Iterator<JobManagerEvent<?>> it = context.getJobManagementClient().observeJobs(Collections.emptyMap()).toIterable().iterator();
    while (it.hasNext()) {
        JobManagerEvent<?> event = it.next();
        if (event instanceof JobUpdateEvent) {
            JobUpdateEvent je = (JobUpdateEvent) event;
            Job job = je.getCurrent();
            if (job.getStatus().getState() == JobState.Accepted) {
                activeJobs.put(job.getId(), job);
            }
        } else if (event instanceof TaskUpdateEvent) {
            TaskUpdateEvent te = (TaskUpdateEvent) event;
            Task task = te.getCurrent();
            if (activeJobs.containsKey(task.getJobId())) {
                activeTasks.computeIfAbsent(task.getJobId(), j -> new HashMap<>()).put(task.getId(), task);
            }
        } else if (event.equals(JobManagerEvent.snapshotMarker())) {
            break;
        }
    }
    return Pair.of(activeJobs, activeTasks);
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) HashMap(java.util.HashMap) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) Job(com.netflix.titus.api.jobmanager.model.job.Job) Map(java.util.Map) HashMap(java.util.HashMap) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)

Example 4 with JobUpdateEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent in project titus-control-plane by Netflix.

the class StreamDataReplicatorPerf method main.

public static void main(String[] args) throws InterruptedException {
    TitusRuntime titusRuntime = TitusRuntimes.internal();
    JobManagementClient client = Mockito.mock(JobManagementClient.class);
    JobConnectorConfiguration configuration = Mockito.mock(JobConnectorConfiguration.class);
    Mockito.when(client.observeJobs(ArgumentMatchers.any())).thenAnswer(invocation -> Flux.defer(() -> {
        JobManagerEvent jobUpdateEvent = JobUpdateEvent.newJob(JOB, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        JobManagerEvent taskUpdateEvent = TaskUpdateEvent.newTask(JOB, TASK, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        return Flux.just(jobUpdateEvent, JobManagerEvent.snapshotMarker()).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).map(tick -> taskUpdateEvent)).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).flatMap(tick -> Flux.error(new RuntimeException("Simulated error"))));
    }));
    JobDataReplicator replicator = new JobDataReplicatorProvider(configuration, client, JobSnapshotFactories.newDefault(titusRuntime), titusRuntime).get();
    replicator.events().subscribe(System.out::println);
    Thread.sleep(3600_000);
}
Also used : JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) Job(com.netflix.titus.api.jobmanager.model.job.Job) ArgumentMatchers(org.mockito.ArgumentMatchers) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactories(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactories) Task(com.netflix.titus.api.jobmanager.model.job.Task) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) Mockito(org.mockito.Mockito) Flux(reactor.core.publisher.Flux) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) Duration(java.time.Duration) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime)

Example 5 with JobUpdateEvent

use of com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent in project titus-control-plane by Netflix.

the class DefaultV3JobOperations method enterActiveMode.

@Activator
public void enterActiveMode() {
    this.reconciliationFramework = jobReconciliationFrameworkFactory.newInstance();
    // BUG: event stream breaks permanently, and cannot be retried.
    // As we cannot fix the underlying issue yet, we have to be able to discover when it happens.
    AtomicLong eventStreamLastError = new AtomicLong();
    Clock clock = titusRuntime.getClock();
    this.transactionLoggerSubscription = JobTransactionLogger.logEvents(reconciliationFramework, eventStreamLastError, clock);
    PolledMeter.using(titusRuntime.getRegistry()).withName(METRIC_EVENT_STREAM_LAST_ERROR).monitorValue(eventStreamLastError, value -> value.get() <= 0 ? 0 : clock.wallTime() - value.get());
    // Remove finished jobs from the reconciliation framework.
    Observable<JobManagerReconcilerEvent> reconciliationEventsObservable = reconciliationFramework.events().onBackpressureBuffer(OBSERVE_JOBS_BACKPRESSURE_BUFFER_SIZE, () -> logger.warn("Overflowed the buffer size: " + OBSERVE_JOBS_BACKPRESSURE_BUFFER_SIZE), BackpressureOverflow.ON_OVERFLOW_ERROR).doOnSubscribe(() -> {
        List<EntityHolder> entityHolders = reconciliationFramework.orderedView(IndexKind.StatusCreationTime);
        for (EntityHolder entityHolder : entityHolders) {
            handleJobCompletedEvent(entityHolder);
        }
    });
    this.reconcilerEventSubscription = titusRuntime.persistentStream(reconciliationEventsObservable).subscribe(event -> {
        if (event instanceof JobModelUpdateReconcilerEvent) {
            JobModelUpdateReconcilerEvent jobUpdateEvent = (JobModelUpdateReconcilerEvent) event;
            handleJobCompletedEvent(jobUpdateEvent.getChangedEntityHolder());
        }
    }, e -> logger.error("Event stream terminated with an error", e), () -> logger.info("Event stream completed"));
    reconciliationFramework.start();
}
Also used : Arrays(java.util.Arrays) JobCompatibility(com.netflix.titus.api.jobmanager.model.job.JobCompatibility) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) LoggerFactory(org.slf4j.LoggerFactory) BasicServiceJobActions(com.netflix.titus.master.jobmanager.service.service.action.BasicServiceJobActions) StringExt(com.netflix.titus.common.util.StringExt) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) PreDestroy(javax.annotation.PreDestroy) FeatureActivationConfiguration(com.netflix.titus.api.FeatureActivationConfiguration) Map(java.util.Map) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) BasicJobActions(com.netflix.titus.master.jobmanager.service.common.action.task.BasicJobActions) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) FunctionExt.alwaysTrue(com.netflix.titus.common.util.FunctionExt.alwaysTrue) JobNewModelReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobModelReconcilerEvent.JobNewModelReconcilerEvent) ImmutableSet(com.google.common.collect.ImmutableSet) Job(com.netflix.titus.api.jobmanager.model.job.Job) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) UUID(java.util.UUID) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) Collectors(java.util.stream.Collectors) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) ProtobufExt(com.netflix.titus.common.util.ProtobufExt) List(java.util.List) JobModelUpdateReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobModelReconcilerEvent.JobModelUpdateReconcilerEvent) Stream(java.util.stream.Stream) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) DisruptionBudget(com.netflix.titus.api.jobmanager.model.job.disruptionbudget.DisruptionBudget) ProxyConfiguration(com.netflix.titus.common.util.guice.annotation.ProxyConfiguration) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) JobAttributes(com.netflix.titus.api.jobmanager.JobAttributes) ObservableExt(com.netflix.titus.common.util.rx.ObservableExt) Clock(com.netflix.titus.common.util.time.Clock) Subscription(rx.Subscription) KillInitiatedActions(com.netflix.titus.master.jobmanager.service.common.action.task.KillInitiatedActions) Completable(rx.Completable) JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) EntitySanitizer(com.netflix.titus.common.model.sanitizer.EntitySanitizer) ServiceJobProcesses(com.netflix.titus.api.jobmanager.model.job.ServiceJobProcesses) MoveTaskBetweenJobsAction(com.netflix.titus.master.jobmanager.service.service.action.MoveTaskBetweenJobsAction) ProxyType(com.netflix.titus.common.util.guice.ProxyType) MetricConstants(com.netflix.titus.master.MetricConstants) Singleton(javax.inject.Singleton) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Observable(rx.Observable) Inject(javax.inject.Inject) CallMetadataUtils(com.netflix.titus.runtime.endpoint.metadata.CallMetadataUtils) Pair(com.netflix.titus.common.util.tuple.Pair) Model(com.netflix.titus.common.framework.reconciler.ModelActionHolder.Model) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) Named(javax.inject.Named) BackpressureOverflow(rx.BackpressureOverflow) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) JobCheckpointReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobCheckpointReconcilerEvent) Logger(org.slf4j.Logger) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) Mono(reactor.core.publisher.Mono) GrpcJobManagementModelConverters(com.netflix.titus.runtime.endpoint.v3.grpc.GrpcJobManagementModelConverters) ManagementSubsystemInitializer(com.netflix.titus.master.service.management.ManagementSubsystemInitializer) JOB_STRICT_SANITIZER(com.netflix.titus.api.jobmanager.model.job.sanitizer.JobSanitizerBuilder.JOB_STRICT_SANITIZER) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) Activator(com.netflix.titus.common.util.guice.annotation.Activator) AtomicLong(java.util.concurrent.atomic.AtomicLong) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) CapacityAttributes(com.netflix.titus.api.jobmanager.model.job.CapacityAttributes) ReconciliationFramework(com.netflix.titus.common.framework.reconciler.ReconciliationFramework) BasicTaskActions(com.netflix.titus.master.jobmanager.service.common.action.task.BasicTaskActions) JobSubmitLimiter(com.netflix.titus.master.jobmanager.service.limiter.JobSubmitLimiter) PolledMeter(com.netflix.spectator.api.patterns.PolledMeter) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Evaluators(com.netflix.titus.common.util.Evaluators) Collections(java.util.Collections) AtomicLong(java.util.concurrent.atomic.AtomicLong) JobModelUpdateReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobModelReconcilerEvent.JobModelUpdateReconcilerEvent) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) Clock(com.netflix.titus.common.util.time.Clock) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) Activator(com.netflix.titus.common.util.guice.annotation.Activator)

Aggregations

JobUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent)8 TaskUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)8 Job (com.netflix.titus.api.jobmanager.model.job.Job)7 Task (com.netflix.titus.api.jobmanager.model.job.Task)7 JobManagerEvent (com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent)5 List (java.util.List)4 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)3 JobChangeNotification (com.netflix.titus.grpc.protogen.JobChangeNotification)3 ArrayList (java.util.ArrayList)3 Optional (java.util.Optional)3 Test (org.junit.Test)3 TaskAttributes (com.netflix.titus.api.jobmanager.TaskAttributes)2 JobDescriptor (com.netflix.titus.api.jobmanager.model.job.JobDescriptor)2 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)2 JobState (com.netflix.titus.api.jobmanager.model.job.JobState)2 JobStatus (com.netflix.titus.api.jobmanager.model.job.JobStatus)2 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)2 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)2 JobManagerConstants (com.netflix.titus.api.jobmanager.service.JobManagerConstants)2 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)2