Search in sources :

Example 1 with ReplicatorEvent

use of com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent in project titus-control-plane by Netflix.

the class GrpcJobReplicatorEventStream method newConnection.

@Override
protected Flux<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>> newConnection() {
    return Flux.<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>>create(sink -> {
        CacheUpdater cacheUpdater = new CacheUpdater(jobSnapshotFactory, keepAliveEnabled, titusRuntime);
        logger.info("Connecting to the job event stream (filteringCriteria={})...", filteringCriteria);
        ConnectableFlux<JobManagerEvent<?>> connectableStream = client.observeJobs(filteringCriteria).publish();
        Flux<JobManagerEvent<?>> augmentedStream;
        if (configuration.isConnectionTimeoutEnabled()) {
            augmentedStream = Flux.merge(connectableStream.take(1).timeout(Duration.ofMillis(configuration.getConnectionTimeoutMs())).ignoreElements().onErrorMap(TimeoutException.class, error -> new TimeoutException(String.format("No event received from stream in %sms", configuration.getConnectionTimeoutMs()))), connectableStream);
        } else {
            augmentedStream = connectableStream;
        }
        Disposable disposable = augmentedStream.subscribe(jobEvent -> {
            long started = titusRuntime.getClock().wallTime();
            try {
                cacheUpdater.onEvent(jobEvent).ifPresent(sink::next);
                eventProcessingLatencies.recordLevel(titusRuntime.getClock().wallTime() - started);
            } catch (Exception e) {
                // Throw error to force the cache reconnect.
                logger.warn("Unexpected error when handling the job change notification: {}", jobEvent, e);
                ExceptionExt.silent(() -> sink.error(e));
            }
        }, e -> ExceptionExt.silent(() -> sink.error(e)), () -> ExceptionExt.silent(sink::complete));
        sink.onDispose(disposable);
        connectableStream.connect();
    }).doOnSubscribe(subscription -> subscriptionCounter.incrementAndGet()).doFinally(signal -> subscriptionCounter.decrementAndGet());
}
Also used : Disposable(reactor.core.Disposable) JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) Disposable(reactor.core.Disposable) Task(com.netflix.titus.api.jobmanager.model.job.Task) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) HashMap(java.util.HashMap) AbstractReplicatorEventStream(com.netflix.titus.runtime.connector.common.replicator.AbstractReplicatorEventStream) Scheduler(reactor.core.scheduler.Scheduler) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) SpectatorExt(com.netflix.titus.common.util.spectator.SpectatorExt) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) ConnectableFlux(reactor.core.publisher.ConnectableFlux) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) Map(java.util.Map) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) ExceptionExt(com.netflix.titus.common.util.ExceptionExt) RemoteJobManagementClientWithKeepAlive(com.netflix.titus.runtime.connector.jobmanager.RemoteJobManagementClientWithKeepAlive) Job(com.netflix.titus.api.jobmanager.model.job.Job) Logger(org.slf4j.Logger) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactory(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactory) Set(java.util.Set) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) JobKeepAliveEvent(com.netflix.titus.api.jobmanager.model.job.event.JobKeepAliveEvent) AtomicLong(java.util.concurrent.atomic.AtomicLong) Flux(reactor.core.publisher.Flux) List(java.util.List) ValueRangeCounter(com.netflix.titus.common.util.spectator.ValueRangeCounter) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) DataReplicatorMetrics(com.netflix.titus.runtime.connector.common.replicator.DataReplicatorMetrics) Optional(java.util.Optional) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) VisibleForTesting(com.google.common.annotations.VisibleForTesting) PolledMeter(com.netflix.spectator.api.patterns.PolledMeter) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) TimeoutException(java.util.concurrent.TimeoutException) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with ReplicatorEvent

use of com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent in project titus-control-plane by Netflix.

the class GrpcJobReplicatorEventStreamTest method testCacheTaskMove.

@Test
public void testCacheTaskMove() {
    Pair<Job, List<Task>> pair = jobServiceStub.createJobAndTasks(SERVICE_JOB);
    Job target = jobServiceStub.createJob(SERVICE_JOB);
    Task task = pair.getRight().get(0);
    String sourceJobId = pair.getLeft().getId();
    String targetJobId = target.getId();
    List<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>> events = new ArrayList<>();
    newConnectVerifier().assertNext(next -> assertThat(next.getSnapshot().getTaskMap().values()).allSatisfy(t -> assertThat(t.getStatus().getState()).isEqualTo(TaskState.Accepted))).then(() -> jobServiceStub.moveTaskToState(task, TaskState.Started)).assertNext(next -> {
        JobSnapshot snapshot = next.getSnapshot();
        Optional<Pair<Job<?>, Task>> taskOpt = snapshot.findTaskById(task.getId());
        assertThat(taskOpt).isPresent();
        assertThat(taskOpt.get().getRight().getStatus().getState()).isEqualTo(TaskState.Started);
        assertThat(snapshot.getTasks(sourceJobId)).containsKey(task.getId());
    }).then(() -> jobServiceStub.getJobOperations().moveServiceTask(sourceJobId, targetJobId, task.getId(), CallMetadata.newBuilder().withCallerId("Test").withCallReason("testing").build()).test().awaitTerminalEvent().assertNoErrors()).recordWith(() -> events).thenConsumeWhile(next -> {
        JobManagerEvent<?> trigger = next.getTrigger();
        if (!(trigger instanceof TaskUpdateEvent)) {
            return true;
        }
        TaskUpdateEvent taskUpdateEvent = (TaskUpdateEvent) trigger;
        return !taskUpdateEvent.isMovedFromAnotherJob();
    }).thenCancel().verify();
    assertThat(events).hasSize(3);
    events.stream().map(ReplicatorEvent::getTrigger).forEach(jobManagerEvent -> {
        if (jobManagerEvent instanceof JobUpdateEvent) {
            JobUpdateEvent jobUpdateEvent = (JobUpdateEvent) jobManagerEvent;
            String eventJobId = jobUpdateEvent.getCurrent().getId();
            assertThat(eventJobId).isIn(sourceJobId, targetJobId);
        } else if (jobManagerEvent instanceof TaskUpdateEvent) {
            TaskUpdateEvent taskUpdateEvent = (TaskUpdateEvent) jobManagerEvent;
            assertThat(taskUpdateEvent.isMovedFromAnotherJob()).isTrue();
            assertThat(taskUpdateEvent.getCurrentJob().getId()).isEqualTo(targetJobId);
            assertThat(taskUpdateEvent.getCurrent().getJobId()).isEqualTo(targetJobId);
            assertThat(taskUpdateEvent.getCurrent().getTaskContext().get(TaskAttributes.TASK_ATTRIBUTES_MOVED_FROM_JOB)).isEqualTo(sourceJobId);
        } else {
            fail("Unexpected event type: %s", jobManagerEvent);
        }
    });
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) JobDescriptorGenerator(com.netflix.titus.testkit.model.job.JobDescriptorGenerator) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) StepVerifier(reactor.test.StepVerifier) Task(com.netflix.titus.api.jobmanager.model.job.Task) CallMetadataConstants(com.netflix.titus.api.model.callmetadata.CallMetadataConstants) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) TimeoutException(java.util.concurrent.TimeoutException) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) ArrayList(java.util.ArrayList) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) Pair(com.netflix.titus.common.util.tuple.Pair) CacheUpdater(com.netflix.titus.runtime.connector.jobmanager.replicator.GrpcJobReplicatorEventStream.CacheUpdater) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) Duration(java.time.Duration) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) Schedulers(reactor.core.scheduler.Schedulers) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) Before(org.junit.Before) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) Job(com.netflix.titus.api.jobmanager.model.job.Job) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactories(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactories) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) Mockito(org.mockito.Mockito) List(java.util.List) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) Assertions.fail(org.assertj.core.api.Assertions.fail) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobComponentStub(com.netflix.titus.testkit.model.job.JobComponentStub) DataReplicatorMetrics(com.netflix.titus.runtime.connector.common.replicator.DataReplicatorMetrics) Optional(java.util.Optional) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Mockito.mock(org.mockito.Mockito.mock) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) Task(com.netflix.titus.api.jobmanager.model.job.Task) Optional(java.util.Optional) ArrayList(java.util.ArrayList) ReplicatorEvent(com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) ArrayList(java.util.ArrayList) List(java.util.List) JobSnapshot(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) Test(org.junit.Test)

Aggregations

Job (com.netflix.titus.api.jobmanager.model.job.Job)2 JobState (com.netflix.titus.api.jobmanager.model.job.JobState)2 Task (com.netflix.titus.api.jobmanager.model.job.Task)2 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)2 JobManagerEvent (com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent)2 JobUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent)2 TaskUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)2 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)2 DataReplicatorMetrics (com.netflix.titus.runtime.connector.common.replicator.DataReplicatorMetrics)2 ReplicatorEvent (com.netflix.titus.runtime.connector.common.replicator.ReplicatorEvent)2 JobConnectorConfiguration (com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration)2 JobManagementClient (com.netflix.titus.runtime.connector.jobmanager.JobManagementClient)2 JobSnapshot (com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshot)2 Duration (java.time.Duration)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Optional (java.util.Optional)2 TimeoutException (java.util.concurrent.TimeoutException)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 PolledMeter (com.netflix.spectator.api.patterns.PolledMeter)1