use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.
the class DefaultNodeConditionControllerTest method checkTasksTerminatedDueToBadNodeConditions.
@Test
public void checkTasksTerminatedDueToBadNodeConditions() {
// Mock jobs, tasks & nodes
Map<String, TitusNode> nodeMap = buildNodes();
List<Job<BatchJobExt>> jobs = getJobs(true);
Map<String, List<Task>> tasksByJobIdMap = buildTasksForJobAndNodeAssignment(new ArrayList<>(nodeMap.values()), jobs);
TitusRuntime titusRuntime = mock(TitusRuntime.class);
when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
RelocationConfiguration configuration = mock(RelocationConfiguration.class);
when(configuration.getBadNodeConditionPattern()).thenReturn(".*Failure");
when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
when(nodeDataResolver.resolve()).thenReturn(nodeMap);
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
when(readOnlyJobOperations.getJobs()).thenReturn(new ArrayList<>(jobs));
tasksByJobIdMap.forEach((key, value) -> when(readOnlyJobOperations.getTasks(key)).thenReturn(value));
JobManagementClient jobManagementClient = mock(JobManagementClient.class);
Set<String> terminatedTaskIds = new HashSet<>();
when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
String taskIdToBeTerminated = invocation.getArgument(0);
terminatedTaskIds.add(taskIdToBeTerminated);
return Mono.empty();
});
DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
assertThat(terminatedTaskIds).isNotEmpty();
assertThat(terminatedTaskIds.size()).isEqualTo(2);
verifyTerminatedTasksOnBadNodes(terminatedTaskIds, tasksByJobIdMap, nodeMap);
}
use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.
the class DefaultNodeConditionControllerTest method noTerminationsOnDataStaleness.
@Test
public void noTerminationsOnDataStaleness() {
TitusRuntime titusRuntime = mock(TitusRuntime.class);
when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
RelocationConfiguration configuration = mock(RelocationConfiguration.class);
when(configuration.getBadNodeConditionPattern()).thenReturn(".*Problem");
when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
when(configuration.getDataStalenessThresholdMs()).thenReturn(8000L);
NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
when(nodeDataResolver.getStalenessMs()).thenReturn(5L);
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(10L);
ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
JobManagementClient jobManagementClient = mock(JobManagementClient.class);
Set<String> terminatedTaskIds = new HashSet<>();
when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
String taskIdToBeTerminated = invocation.getArgument(0);
terminatedTaskIds.add(taskIdToBeTerminated);
return Mono.empty();
});
DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
// No tasks terminated
assertThat(terminatedTaskIds).isEmpty();
}
use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.
the class StreamDataReplicatorPerf method main.
public static void main(String[] args) throws InterruptedException {
TitusRuntime titusRuntime = TitusRuntimes.internal();
JobManagementClient client = Mockito.mock(JobManagementClient.class);
JobConnectorConfiguration configuration = Mockito.mock(JobConnectorConfiguration.class);
Mockito.when(client.observeJobs(ArgumentMatchers.any())).thenAnswer(invocation -> Flux.defer(() -> {
JobManagerEvent jobUpdateEvent = JobUpdateEvent.newJob(JOB, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
JobManagerEvent taskUpdateEvent = TaskUpdateEvent.newTask(JOB, TASK, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
return Flux.just(jobUpdateEvent, JobManagerEvent.snapshotMarker()).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).map(tick -> taskUpdateEvent)).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).flatMap(tick -> Flux.error(new RuntimeException("Simulated error"))));
}));
JobDataReplicator replicator = new JobDataReplicatorProvider(configuration, client, JobSnapshotFactories.newDefault(titusRuntime), titusRuntime).get();
replicator.events().subscribe(System.out::println);
Thread.sleep(3600_000);
}
use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.
the class DefaultNodeConditionControllerTest method badNodeConditionsIgnoredForJobsNotOptingIn.
@Test
public void badNodeConditionsIgnoredForJobsNotOptingIn() {
Map<String, TitusNode> nodeMap = buildNodes();
List<Job<BatchJobExt>> jobs = getJobs(false);
Map<String, List<Task>> stringListMap = buildTasksForJobAndNodeAssignment(new ArrayList<>(nodeMap.values()), jobs);
TitusRuntime titusRuntime = mock(TitusRuntime.class);
when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
RelocationConfiguration configuration = mock(RelocationConfiguration.class);
when(configuration.getBadNodeConditionPattern()).thenReturn(".*Failure");
when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
when(nodeDataResolver.resolve()).thenReturn(nodeMap);
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
// Job attribute "terminateContainerOnBadAgent" = False
ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
when(readOnlyJobOperations.getJobs()).thenReturn(new ArrayList<>(jobs));
stringListMap.forEach((key, value) -> when(readOnlyJobOperations.getTasks(key)).thenReturn(value));
JobManagementClient jobManagementClient = mock(JobManagementClient.class);
Set<String> terminatedTaskIds = new HashSet<>();
when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
String taskIdToBeTerminated = invocation.getArgument(0);
terminatedTaskIds.add(taskIdToBeTerminated);
return Mono.empty();
});
DefaultNodeConditionController nodeConditionController = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
StepVerifier.create(nodeConditionController.handleNodesWithBadCondition(executionContext)).verifyComplete();
// no tasks should be terminated for jobs
assertThat(terminatedTaskIds).isEmpty();
}
use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.
the class RelocationConnectorStubs method getApplicationContext.
public ApplicationContext getApplicationContext() {
StaticApplicationContext context = new StaticApplicationContext();
context.getBeanFactory().registerSingleton("titusRuntime", titusRuntime);
context.getBeanFactory().registerSingleton("nodeDataResolver", nodeDataResolver);
context.getBeanFactory().registerSingleton("readOnlyJobOperations", jobOperations);
context.getBeanFactory().registerSingleton("readOnlyEvictionOperations", evictionComponentStub.getEvictionOperations());
context.getBeanFactory().registerSingleton("evictionServiceClient", evictionComponentStub.getEvictionServiceClient());
context.getBeanFactory().registerSingleton("jobManagementClient", mock(JobManagementClient.class));
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
context.getBeanFactory().registerSingleton("jobDataReplicator", jobDataReplicator);
EvictionDataReplicator evictionDataReplicator = mock(EvictionDataReplicator.class);
when(evictionDataReplicator.getStalenessMs()).thenReturn(0L);
context.getBeanFactory().registerSingleton("evictionDataReplicator", evictionDataReplicator);
context.refresh();
return context;
}
Aggregations