use of com.netflix.titus.runtime.connector.jobmanager.JobEventPropagationMetrics in project titus-control-plane by Netflix.
the class ObserveJobsCommand method execute.
@Override
public void execute(CommandContext context) throws Exception {
long keepAliveMs = context.getCLI().hasOption('k') ? Long.parseLong(context.getCLI().getOptionValue('k')) : -1;
RemoteJobManagementClient service = keepAliveMs > 0 ? context.getJobManagementClientWithKeepAlive(keepAliveMs) : context.getJobManagementClient();
Flux<JobManagerEvent<?>> events;
Set<String> jobFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('j'));
Set<String> taskFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('t'));
boolean printLatency = context.getCLI().hasOption('l');
boolean printEvents = !context.getCLI().hasOption('n');
boolean snapshotOnly = context.getCLI().hasOption('s');
JobEventPropagationMetrics metrics = JobEventPropagationMetrics.newExternalClientMetrics("cli", context.getTitusRuntime());
if (context.getCLI().hasOption('i')) {
String jobId = context.getCLI().getOptionValue('i');
events = service.observeJob(jobId);
} else if (jobFields.isEmpty() && taskFields.isEmpty()) {
events = service.observeJobs(Collections.emptyMap());
} else {
// Special case. Fields filtering cannot be used with RemoteJobManagementClient which converts data to
// the core model. We have to use GRPC directly.
executeWithFiltering(context, jobFields, taskFields, printEvents, snapshotOnly);
return;
}
while (true) {
logger.info("Establishing a new connection to the job event stream endpoint...");
executeOnce(events, metrics, printLatency, printEvents, snapshotOnly);
if (snapshotOnly) {
return;
}
}
}
use of com.netflix.titus.runtime.connector.jobmanager.JobEventPropagationMetrics in project titus-control-plane by Netflix.
the class ObserveJobsCommand method executeOnce.
private void executeOnce(Flux<JobManagerEvent<?>> events, JobEventPropagationMetrics metrics, boolean printLatency, boolean printEvents, boolean snapshotOnly) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean snapshotRead = new AtomicBoolean();
Stopwatch stopwatch = Stopwatch.createStarted();
Disposable disposable = events.subscribe(next -> {
if (next == JobManagerEvent.snapshotMarker()) {
logger.info("Emitted: snapshot marker in {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
snapshotRead.set(true);
if (snapshotOnly) {
latch.countDown();
}
} else if (next instanceof JobUpdateEvent) {
Job<?> job = ((JobUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted job update: jobId={}({}), jobState={}, version={}", job.getId(), next.isArchived() ? "archived" : job.getStatus().getState(), job.getStatus(), job.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordJob(((JobUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> {
logger.info("Event propagation data: stages={}", t);
});
}
} else if (next instanceof TaskUpdateEvent) {
Task task = ((TaskUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted task update: jobId={}({}), taskId={}, taskState={}, version={}", task.getJobId(), next.isArchived() ? "archived" : task.getStatus().getState(), task.getId(), task.getStatus(), task.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordTask(((TaskUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> logger.info("Event propagation data: {}", t));
}
} else if (next instanceof JobKeepAliveEvent) {
if (printEvents) {
logger.info("Keep alive response: " + next);
}
} else {
logger.info("Unrecognized event type: {}", next);
}
}, e -> {
ErrorReports.handleReplyError("Error in the event stream", e);
latch.countDown();
}, () -> {
logger.info("Event stream closed");
latch.countDown();
});
latch.await();
disposable.dispose();
}
Aggregations