use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class ObserveJobsCommand method execute.
@Override
public void execute(CommandContext context) throws Exception {
long keepAliveMs = context.getCLI().hasOption('k') ? Long.parseLong(context.getCLI().getOptionValue('k')) : -1;
RemoteJobManagementClient service = keepAliveMs > 0 ? context.getJobManagementClientWithKeepAlive(keepAliveMs) : context.getJobManagementClient();
Flux<JobManagerEvent<?>> events;
Set<String> jobFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('j'));
Set<String> taskFields = StringExt.splitByCommaIntoSet(context.getCLI().getOptionValue('t'));
boolean printLatency = context.getCLI().hasOption('l');
boolean printEvents = !context.getCLI().hasOption('n');
boolean snapshotOnly = context.getCLI().hasOption('s');
JobEventPropagationMetrics metrics = JobEventPropagationMetrics.newExternalClientMetrics("cli", context.getTitusRuntime());
if (context.getCLI().hasOption('i')) {
String jobId = context.getCLI().getOptionValue('i');
events = service.observeJob(jobId);
} else if (jobFields.isEmpty() && taskFields.isEmpty()) {
events = service.observeJobs(Collections.emptyMap());
} else {
// Special case. Fields filtering cannot be used with RemoteJobManagementClient which converts data to
// the core model. We have to use GRPC directly.
executeWithFiltering(context, jobFields, taskFields, printEvents, snapshotOnly);
return;
}
while (true) {
logger.info("Establishing a new connection to the job event stream endpoint...");
executeOnce(events, metrics, printLatency, printEvents, snapshotOnly);
if (snapshotOnly) {
return;
}
}
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class ObserveJobsCommand method executeOnce.
private void executeOnce(Flux<JobManagerEvent<?>> events, JobEventPropagationMetrics metrics, boolean printLatency, boolean printEvents, boolean snapshotOnly) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean snapshotRead = new AtomicBoolean();
Stopwatch stopwatch = Stopwatch.createStarted();
Disposable disposable = events.subscribe(next -> {
if (next == JobManagerEvent.snapshotMarker()) {
logger.info("Emitted: snapshot marker in {}ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
snapshotRead.set(true);
if (snapshotOnly) {
latch.countDown();
}
} else if (next instanceof JobUpdateEvent) {
Job<?> job = ((JobUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted job update: jobId={}({}), jobState={}, version={}", job.getId(), next.isArchived() ? "archived" : job.getStatus().getState(), job.getStatus(), job.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordJob(((JobUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> {
logger.info("Event propagation data: stages={}", t);
});
}
} else if (next instanceof TaskUpdateEvent) {
Task task = ((TaskUpdateEvent) next).getCurrent();
if (printEvents) {
logger.info("Emitted task update: jobId={}({}), taskId={}, taskState={}, version={}", task.getJobId(), next.isArchived() ? "archived" : task.getStatus().getState(), task.getId(), task.getStatus(), task.getVersion());
}
Optional<EventPropagationTrace> trace = metrics.recordTask(((TaskUpdateEvent) next).getCurrent(), !snapshotRead.get());
if (printLatency) {
trace.ifPresent(t -> logger.info("Event propagation data: {}", t));
}
} else if (next instanceof JobKeepAliveEvent) {
if (printEvents) {
logger.info("Keep alive response: " + next);
}
} else {
logger.info("Unrecognized event type: {}", next);
}
}, e -> {
ErrorReports.handleReplyError("Error in the event stream", e);
latch.countDown();
}, () -> {
logger.info("Event stream closed");
latch.countDown();
});
latch.await();
disposable.dispose();
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class JobUtil method loadActiveJobsAndTasks.
public static Pair<Map<String, Job>, Map<String, Map<String, Task>>> loadActiveJobsAndTasks(CommandContext context) {
Map<String, Job> activeJobs = new HashMap<>();
Map<String, Map<String, Task>> activeTasks = new HashMap<>();
Iterator<JobManagerEvent<?>> it = context.getJobManagementClient().observeJobs(Collections.emptyMap()).toIterable().iterator();
while (it.hasNext()) {
JobManagerEvent<?> event = it.next();
if (event instanceof JobUpdateEvent) {
JobUpdateEvent je = (JobUpdateEvent) event;
Job job = je.getCurrent();
if (job.getStatus().getState() == JobState.Accepted) {
activeJobs.put(job.getId(), job);
}
} else if (event instanceof TaskUpdateEvent) {
TaskUpdateEvent te = (TaskUpdateEvent) event;
Task task = te.getCurrent();
if (activeJobs.containsKey(task.getJobId())) {
activeTasks.computeIfAbsent(task.getJobId(), j -> new HashMap<>()).put(task.getId(), task);
}
} else if (event.equals(JobManagerEvent.snapshotMarker())) {
break;
}
}
return Pair.of(activeJobs, activeTasks);
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class StreamDataReplicatorPerf method main.
public static void main(String[] args) throws InterruptedException {
TitusRuntime titusRuntime = TitusRuntimes.internal();
JobManagementClient client = Mockito.mock(JobManagementClient.class);
JobConnectorConfiguration configuration = Mockito.mock(JobConnectorConfiguration.class);
Mockito.when(client.observeJobs(ArgumentMatchers.any())).thenAnswer(invocation -> Flux.defer(() -> {
JobManagerEvent jobUpdateEvent = JobUpdateEvent.newJob(JOB, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
JobManagerEvent taskUpdateEvent = TaskUpdateEvent.newTask(JOB, TASK, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
return Flux.just(jobUpdateEvent, JobManagerEvent.snapshotMarker()).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).map(tick -> taskUpdateEvent)).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).flatMap(tick -> Flux.error(new RuntimeException("Simulated error"))));
}));
JobDataReplicator replicator = new JobDataReplicatorProvider(configuration, client, JobSnapshotFactories.newDefault(titusRuntime), titusRuntime).get();
replicator.events().subscribe(System.out::println);
Thread.sleep(3600_000);
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method toJobManagerEvent.
private List<JobManagerEvent<?>> toJobManagerEvent(Predicate<Pair<Job<?>, List<Task>>> jobsPredicate, Predicate<Pair<Job<?>, Task>> tasksPredicate, boolean withCheckpoints, JobManagerReconcilerEvent event) {
if (event instanceof JobCheckpointReconcilerEvent) {
if (withCheckpoints) {
JobCheckpointReconcilerEvent checkpoint = (JobCheckpointReconcilerEvent) event;
return Collections.singletonList(JobManagerEvent.keepAliveEvent(checkpoint.getTimestampNano()));
}
return Collections.emptyList();
}
if (event instanceof JobNewModelReconcilerEvent) {
JobNewModelReconcilerEvent newModelEvent = (JobNewModelReconcilerEvent) event;
return toNewJobUpdateEvent(newModelEvent, jobsPredicate);
}
if (!(event instanceof JobModelUpdateReconcilerEvent)) {
return Collections.emptyList();
}
JobModelUpdateReconcilerEvent modelUpdateEvent = (JobModelUpdateReconcilerEvent) event;
if (modelUpdateEvent.getModelActionHolder().getModel() != Model.Reference) {
return Collections.emptyList();
}
if (modelUpdateEvent.getChangedEntityHolder().getEntity() instanceof Job) {
// We have to emit for this case both task archived event followed by job update event.
if (modelUpdateEvent.getChangeAction().getTrigger() == Trigger.ReconcilerServiceTaskRemoved) {
Task archivedTask = modelUpdateEvent.getChangeAction().getTask().orElse(null);
if (archivedTask != null) {
Job<?> job = modelUpdateEvent.getJob();
TaskUpdateEvent archiveEvent = TaskUpdateEvent.taskArchived(job, archivedTask, modelUpdateEvent.getCallMetadata());
List<JobManagerEvent<?>> events = new ArrayList<>();
events.add(archiveEvent);
events.addAll(toJobUpdateEvent(modelUpdateEvent, jobsPredicate));
return events;
}
}
return toJobUpdateEvent(modelUpdateEvent, jobsPredicate);
}
return toTaskUpdateEvent(modelUpdateEvent, tasksPredicate);
}
Aggregations