use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class GrpcJobReplicatorEventStream method newConnection.
@Override
protected Flux<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>> newConnection() {
return Flux.<ReplicatorEvent<JobSnapshot, JobManagerEvent<?>>>create(sink -> {
CacheUpdater cacheUpdater = new CacheUpdater(jobSnapshotFactory, keepAliveEnabled, titusRuntime);
logger.info("Connecting to the job event stream (filteringCriteria={})...", filteringCriteria);
ConnectableFlux<JobManagerEvent<?>> connectableStream = client.observeJobs(filteringCriteria).publish();
Flux<JobManagerEvent<?>> augmentedStream;
if (configuration.isConnectionTimeoutEnabled()) {
augmentedStream = Flux.merge(connectableStream.take(1).timeout(Duration.ofMillis(configuration.getConnectionTimeoutMs())).ignoreElements().onErrorMap(TimeoutException.class, error -> new TimeoutException(String.format("No event received from stream in %sms", configuration.getConnectionTimeoutMs()))), connectableStream);
} else {
augmentedStream = connectableStream;
}
Disposable disposable = augmentedStream.subscribe(jobEvent -> {
long started = titusRuntime.getClock().wallTime();
try {
cacheUpdater.onEvent(jobEvent).ifPresent(sink::next);
eventProcessingLatencies.recordLevel(titusRuntime.getClock().wallTime() - started);
} catch (Exception e) {
// Throw error to force the cache reconnect.
logger.warn("Unexpected error when handling the job change notification: {}", jobEvent, e);
ExceptionExt.silent(() -> sink.error(e));
}
}, e -> ExceptionExt.silent(() -> sink.error(e)), () -> ExceptionExt.silent(sink::complete));
sink.onDispose(disposable);
connectableStream.connect();
}).doOnSubscribe(subscription -> subscriptionCounter.incrementAndGet()).doFinally(signal -> subscriptionCounter.decrementAndGet());
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class DefaultLoadBalancerServiceTest method newTasksGetRegistered.
@Test
public void newTasksGetRegistered() {
String jobId = UUID.randomUUID().toString();
String taskId = UUID.randomUUID().toString();
String loadBalancerId = "lb-" + UUID.randomUUID().toString();
PublishSubject<JobManagerEvent<?>> taskEvents = PublishSubject.create();
when(client.registerAll(any(), any())).thenReturn(Completable.complete());
when(client.deregisterAll(any(), any())).thenReturn(Completable.complete());
when(v3JobOperations.observeJobs()).thenReturn(taskEvents);
when(v3JobOperations.getTasks(jobId)).thenReturn(Collections.emptyList());
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, jobId);
LoadBalancerConfiguration configuration = LoadBalancerTests.mockConfiguration(MIN_TIME_IN_QUEUE_MS);
DefaultLoadBalancerService service = new DefaultLoadBalancerService(runtime, configuration, client, loadBalancerStore, loadBalancerJobOperations, reconciler, validator, testScheduler);
AssertableSubscriber<Batch<TargetStateBatchable, String>> testSubscriber = service.events().test();
assertTrue(service.addLoadBalancer(jobId, loadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertThat(service.getJobLoadBalancers(jobId).toBlocking().first()).isEqualTo(loadBalancerId);
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
Task launched = ServiceJobTask.newBuilder().withJobId(jobId).withId(taskId).withStatus(TaskStatus.newBuilder().withState(TaskState.Launched).build()).build();
Task startingWithIp = launched.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.StartInitiated).build()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "1.2.3.4")).build();
Task started = startingWithIp.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.Started).build()).build();
LoadBalancerTargetState expectedTarget = new LoadBalancerTargetState(new LoadBalancerTarget(loadBalancerId, started.getId(), "1.2.3.4"), REGISTERED);
// events with no state transition gets ignored
taskEvents.onNext(TaskUpdateEvent.newTask(null, launched, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
// events to !Started states get ignored
taskEvents.onNext(TaskUpdateEvent.taskChange(null, startingWithIp, launched, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
// finally detect the task is UP and gets registered
taskEvents.onNext(TaskUpdateEvent.taskChange(null, started, startingWithIp, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(1);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).contains(expectedTarget);
verify(client).registerAll(eq(loadBalancerId), argThat(set -> set.contains("1.2.3.4")));
verify(client, never()).deregisterAll(eq(loadBalancerId), any());
verifyReconcilerIgnore(loadBalancerId, "1.2.3.4");
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class DefaultLoadBalancerServiceTest method finishedTasksGetDeregistered.
@Test
public void finishedTasksGetDeregistered() {
String jobId = UUID.randomUUID().toString();
String loadBalancerId = "lb-" + UUID.randomUUID().toString();
PublishSubject<JobManagerEvent<?>> taskEvents = PublishSubject.create();
when(client.registerAll(any(), any())).thenReturn(Completable.complete());
when(client.deregisterAll(any(), any())).thenReturn(Completable.complete());
when(v3JobOperations.observeJobs()).thenReturn(taskEvents);
when(v3JobOperations.getTasks(jobId)).thenReturn(Collections.emptyList());
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, jobId);
LoadBalancerConfiguration configuration = LoadBalancerTests.mockConfiguration(MIN_TIME_IN_QUEUE_MS);
DefaultLoadBalancerService service = new DefaultLoadBalancerService(runtime, configuration, client, loadBalancerStore, loadBalancerJobOperations, reconciler, validator, testScheduler);
AssertableSubscriber<Batch<TargetStateBatchable, String>> testSubscriber = service.events().test();
assertTrue(service.addLoadBalancer(jobId, loadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertThat(service.getJobLoadBalancers(jobId).toBlocking().first()).isEqualTo(loadBalancerId);
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
// a task that was prematurely killed before having an IP address associated to it should be ignored
Task noIp = ServiceJobTask.newBuilder().withJobId(jobId).withId(UUID.randomUUID().toString()).withStatus(TaskStatus.newBuilder().withState(TaskState.KillInitiated).build()).build();
Task noIpFinished = noIp.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.Finished).build()).build();
taskEvents.onNext(TaskUpdateEvent.taskChange(null, noIpFinished, noIp, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
// 3 state transitions to 3 different terminal events
Task first = noIp.toBuilder().withId(UUID.randomUUID().toString()).withStatus(TaskStatus.newBuilder().withState(TaskState.Started).build()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "1.1.1.1")).build();
Task firstFinished = first.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.Finished).build()).build();
LoadBalancerTargetState expectedFirstTarget = new LoadBalancerTargetState(new LoadBalancerTarget(loadBalancerId, firstFinished.getId(), "1.1.1.1"), DEREGISTERED);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).doesNotContain(expectedFirstTarget);
taskEvents.onNext(TaskUpdateEvent.taskChange(null, firstFinished, first, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(1);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).contains(expectedFirstTarget);
verify(client, never()).registerAll(eq(loadBalancerId), any());
verify(client).deregisterAll(eq(loadBalancerId), argThat(set -> set.contains("1.1.1.1")));
verifyReconcilerIgnore(loadBalancerId, "1.1.1.1");
Task second = first.toBuilder().withId(UUID.randomUUID().toString()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "2.2.2.2")).build();
Task secondKilling = second.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.KillInitiated).build()).build();
LoadBalancerTargetState expectedSecondTarget = new LoadBalancerTargetState(new LoadBalancerTarget(loadBalancerId, secondKilling.getId(), "2.2.2.2"), DEREGISTERED);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).doesNotContain(expectedSecondTarget);
taskEvents.onNext(TaskUpdateEvent.taskChange(null, secondKilling, second, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(2);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).contains(expectedSecondTarget);
verify(client, never()).registerAll(eq(loadBalancerId), any());
verify(client).deregisterAll(eq(loadBalancerId), argThat(set -> set.contains("2.2.2.2")));
verifyReconcilerIgnore(loadBalancerId, "2.2.2.2");
Task third = first.toBuilder().withId(UUID.randomUUID().toString()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "3.3.3.3")).build();
Task thirdDisconnected = third.toBuilder().withStatus(TaskStatus.newBuilder().withState(TaskState.Disconnected).build()).build();
LoadBalancerTargetState expectedThirdTarget = new LoadBalancerTargetState(new LoadBalancerTarget(loadBalancerId, thirdDisconnected.getId(), "3.3.3.3"), DEREGISTERED);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).doesNotContain(expectedThirdTarget);
taskEvents.onNext(TaskUpdateEvent.taskChange(null, thirdDisconnected, third, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(3);
assertThat(loadBalancerStore.getLoadBalancerTargets(loadBalancerId).collectList().block()).contains(expectedThirdTarget);
verify(client, never()).registerAll(eq(loadBalancerId), any());
verify(client).deregisterAll(eq(loadBalancerId), argThat(set -> set.contains("3.3.3.3")));
verifyReconcilerIgnore(loadBalancerId, "3.3.3.3");
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class DefaultLoadBalancerServiceTest method movedTaskOnlySourceAssociatedWithLoadBalancer.
@Test
public void movedTaskOnlySourceAssociatedWithLoadBalancer() {
String taskId = UUID.randomUUID().toString();
String sourceJobId = UUID.randomUUID().toString();
String targetJobId = UUID.randomUUID().toString();
String sourceLoadBalancerId = "lb-" + UUID.randomUUID().toString();
PublishSubject<JobManagerEvent<?>> taskEvents = PublishSubject.create();
when(client.registerAll(any(), any())).thenReturn(Completable.complete());
when(client.deregisterAll(any(), any())).thenReturn(Completable.complete());
when(v3JobOperations.observeJobs()).thenReturn(taskEvents);
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, sourceJobId);
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, targetJobId);
LoadBalancerConfiguration configuration = LoadBalancerTests.mockConfiguration(MIN_TIME_IN_QUEUE_MS);
DefaultLoadBalancerService service = new DefaultLoadBalancerService(runtime, configuration, client, loadBalancerStore, loadBalancerJobOperations, reconciler, validator, testScheduler);
AssertableSubscriber<Batch<TargetStateBatchable, String>> testSubscriber = service.events().test();
assertTrue(service.addLoadBalancer(sourceJobId, sourceLoadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertThat(service.getJobLoadBalancers(sourceJobId).toBlocking().toIterable()).containsExactlyInAnyOrder(sourceLoadBalancerId);
assertThat(service.getJobLoadBalancers(targetJobId).toBlocking().toIterable()).isEmpty();
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
Task moved = ServiceJobTask.newBuilder().withJobId(targetJobId).withId(taskId).withStatus(TaskStatus.newBuilder().withState(TaskState.Started).build()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "1.2.3.4", TaskAttributes.TASK_ATTRIBUTES_MOVED_FROM_JOB, sourceJobId)).build();
// detect the task is moved and gets deregistered from the source
taskEvents.onNext(TaskUpdateEvent.newTaskFromAnotherJob(null, moved, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(1);
verify(client).deregisterAll(eq(sourceLoadBalancerId), argThat(set -> set.contains("1.2.3.4")));
verify(client, never()).registerAll(any(), any());
verifyReconcilerIgnore(sourceLoadBalancerId, "1.2.3.4");
}
use of com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent in project titus-control-plane by Netflix.
the class DefaultLoadBalancerServiceTest method movedTasks.
@Test
public void movedTasks() {
String taskId = UUID.randomUUID().toString();
String sourceJobId = UUID.randomUUID().toString();
String targetJobId = UUID.randomUUID().toString();
String sourceLoadBalancerId = "lb-" + UUID.randomUUID().toString();
String targetLoadBalancerId = "lb-" + UUID.randomUUID().toString();
String commonLoadBalancerId = "lb-" + UUID.randomUUID().toString();
PublishSubject<JobManagerEvent<?>> taskEvents = PublishSubject.create();
when(client.registerAll(any(), any())).thenReturn(Completable.complete());
when(client.deregisterAll(any(), any())).thenReturn(Completable.complete());
when(v3JobOperations.observeJobs()).thenReturn(taskEvents);
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, sourceJobId);
LoadBalancerTests.applyValidGetJobMock(v3JobOperations, targetJobId);
LoadBalancerConfiguration configuration = LoadBalancerTests.mockConfiguration(MIN_TIME_IN_QUEUE_MS);
DefaultLoadBalancerService service = new DefaultLoadBalancerService(runtime, configuration, client, loadBalancerStore, loadBalancerJobOperations, reconciler, validator, testScheduler);
AssertableSubscriber<Batch<TargetStateBatchable, String>> testSubscriber = service.events().test();
assertTrue(service.addLoadBalancer(sourceJobId, sourceLoadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertTrue(service.addLoadBalancer(sourceJobId, commonLoadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertThat(service.getJobLoadBalancers(sourceJobId).toBlocking().toIterable()).containsExactlyInAnyOrder(sourceLoadBalancerId, commonLoadBalancerId);
assertTrue(service.addLoadBalancer(targetJobId, targetLoadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertTrue(service.addLoadBalancer(targetJobId, commonLoadBalancerId).await(100, TimeUnit.MILLISECONDS));
assertThat(service.getJobLoadBalancers(targetJobId).toBlocking().toIterable()).containsExactlyInAnyOrder(targetLoadBalancerId, commonLoadBalancerId);
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(0);
verify(client, never()).registerAll(any(), any());
verify(client, never()).deregisterAll(any(), any());
verifyNoReconcilerIgnore();
Task moved = ServiceJobTask.newBuilder().withJobId(targetJobId).withId(taskId).withStatus(TaskStatus.newBuilder().withState(TaskState.Started).build()).withTaskContext(CollectionsExt.asMap(TaskAttributes.TASK_ATTRIBUTES_CONTAINER_IP, "1.2.3.4", TaskAttributes.TASK_ATTRIBUTES_MOVED_FROM_JOB, sourceJobId)).build();
// detect the task is moved, gets deregistered from the source and registered on the target
taskEvents.onNext(TaskUpdateEvent.newTaskFromAnotherJob(null, moved, callMetadata));
testScheduler.advanceTimeBy(FLUSH_WAIT_TIME_MS, TimeUnit.MILLISECONDS);
testSubscriber.assertNoErrors().assertValueCount(2);
verify(client).registerAll(eq(targetLoadBalancerId), argThat(set -> set.contains("1.2.3.4")));
verify(client).deregisterAll(eq(sourceLoadBalancerId), argThat(set -> set.contains("1.2.3.4")));
verifyReconcilerIgnore(targetLoadBalancerId, "1.2.3.4");
verifyReconcilerIgnore(sourceLoadBalancerId, "1.2.3.4");
// load balancers associated with both source and target jobs are not changed
verify(client, never()).registerAll(eq(commonLoadBalancerId), any());
verify(client, never()).deregisterAll(eq(commonLoadBalancerId), any());
verifyNoReconcilerIgnore(commonLoadBalancerId);
verifyNoReconcilerIgnore(commonLoadBalancerId);
}
Aggregations