use of com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent in project titus-control-plane by Netflix.
the class ObserveJobsCommand method executeOnce.
private void executeOnce(Flux<JobManagerEvent<?>> events, JobEventPropagationMetrics metrics, boolean printLatency, boolean printEvents, boolean snapshotOnly) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean snapshotRead = new AtomicBoolean();
Stopwatch stopwatch = Stopwatch.createStarted();
Disposable disposable = events.subscribe(next -> {
if (next == JobManagerEvent.snapshotMarker()) {
logger.info("Emitted: snapshot marker in {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
snapshotRead.set(true);
if (snapshotOnly) {
latch.countDown();
}
} else if (next instanceof JobUpdateEvent) {
Job<?> job = ((JobUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted job update: jobId={}({}), jobState={}, version={}", job.getId(), next.isArchived() ? "archived" : job.getStatus().getState(), job.getStatus(), job.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordJob(((JobUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> {
logger.info("Event propagation data: stages={}", t);
});
}
} else if (next instanceof TaskUpdateEvent) {
Task task = ((TaskUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted task update: jobId={}({}), taskId={}, taskState={}, version={}", task.getJobId(), next.isArchived() ? "archived" : task.getStatus().getState(), task.getId(), task.getStatus(), task.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordTask(((TaskUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> logger.info("Event propagation data: {}", t));
}
} else if (next instanceof JobKeepAliveEvent) {
if (printEvents) {
logger.info("Keep alive response: " + next);
}
} else {
logger.info("Unrecognized event type: {}", next);
}
}, e -> {
ErrorReports.handleReplyError("Error in the event stream", e);
latch.countDown();
}, () -> {
logger.info("Event stream closed");
latch.countDown();
});
latch.await();
disposable.dispose();
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent in project titus-control-plane by Netflix.
the class LocalCacheQueryProcessor method observeJobs.
public Observable<JobChangeNotification> observeJobs(ObserveJobsQuery query) {
JobQueryCriteria<TaskStatus.TaskState, JobDescriptor.JobSpecCase> criteria = toJobQueryCriteria(query);
V3JobQueryCriteriaEvaluator jobsPredicate = new V3JobQueryCriteriaEvaluator(criteria, titusRuntime);
V3TaskQueryCriteriaEvaluator tasksPredicate = new V3TaskQueryCriteriaEvaluator(criteria, titusRuntime);
Set<String> jobFields = newFieldsFilter(query.getJobFieldsList(), JOB_MINIMUM_FIELD_SET);
Set<String> taskFields = newFieldsFilter(query.getTaskFieldsList(), TASK_MINIMUM_FIELD_SET);
Flux<JobChangeNotification> eventStream = Flux.defer(() -> {
AtomicBoolean first = new AtomicBoolean(true);
return jobDataReplicator.events().subscribeOn(scheduler).publishOn(scheduler).flatMap(event -> {
JobManagerEvent<?> jobManagerEvent = event.getRight();
long now = titusRuntime.getClock().wallTime();
JobSnapshot snapshot = event.getLeft();
Optional<JobChangeNotification> grpcEvent = toObserveJobsEvent(snapshot, jobManagerEvent, now, jobsPredicate, tasksPredicate, jobFields, taskFields);
// On first event emit full snapshot first
if (first.getAndSet(false)) {
List<JobChangeNotification> snapshotEvents = buildSnapshot(snapshot, now, jobsPredicate, tasksPredicate, jobFields, taskFields);
grpcEvent.ifPresent(snapshotEvents::add);
return Flux.fromIterable(snapshotEvents);
}
// subscribe again. Snapshot marker indicates that the underlying GRPC stream was disconnected.
if (jobManagerEvent == JobManagerEvent.snapshotMarker()) {
return Mono.error(new StatusRuntimeException(Status.ABORTED.augmentDescription("Downstream event stream reconnected.")));
}
// to filter them out here.
if (jobManagerEvent instanceof JobKeepAliveEvent) {
// Check if staleness is not too high.
if (jobDataReplicator.getStalenessMs() > configuration.getObserveJobsStalenessDisconnectMs()) {
rejectedByStalenessTooHighMetric.increment();
return Mono.error(new StatusRuntimeException(Status.ABORTED.augmentDescription("Data staleness in the event stream is too high. Most likely caused by connectivity issue to the downstream server.")));
}
return Mono.empty();
}
return grpcEvent.map(Flux::just).orElseGet(Flux::empty);
});
});
return ReactorExt.toObservable(eventStream);
}
Aggregations