Search in sources :

Example 1 with JobDataReplicator

use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.

the class DefaultNodeConditionControllerTest method checkTasksTerminatedDueToBadNodeConditions.

@Test
public void checkTasksTerminatedDueToBadNodeConditions() {
    // Mock jobs, tasks & nodes
    Map<String, TitusNode> nodeMap = buildNodes();
    List<Job<BatchJobExt>> jobs = getJobs(true);
    Map<String, List<Task>> tasksByJobIdMap = buildTasksForJobAndNodeAssignment(new ArrayList<>(nodeMap.values()), jobs);
    TitusRuntime titusRuntime = mock(TitusRuntime.class);
    when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
    RelocationConfiguration configuration = mock(RelocationConfiguration.class);
    when(configuration.getBadNodeConditionPattern()).thenReturn(".*Failure");
    when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
    NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
    when(nodeDataResolver.resolve()).thenReturn(nodeMap);
    JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
    when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
    ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
    when(readOnlyJobOperations.getJobs()).thenReturn(new ArrayList<>(jobs));
    tasksByJobIdMap.forEach((key, value) -> when(readOnlyJobOperations.getTasks(key)).thenReturn(value));
    JobManagementClient jobManagementClient = mock(JobManagementClient.class);
    Set<String> terminatedTaskIds = new HashSet<>();
    when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
        String taskIdToBeTerminated = invocation.getArgument(0);
        terminatedTaskIds.add(taskIdToBeTerminated);
        return Mono.empty();
    });
    DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
    ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
    StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
    assertThat(terminatedTaskIds).isNotEmpty();
    assertThat(terminatedTaskIds.size()).isEqualTo(2);
    verifyTerminatedTasksOnBadNodes(terminatedTaskIds, tasksByJobIdMap, nodeMap);
}
Also used : JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) ExecutionContext(com.netflix.titus.common.framework.scheduler.ExecutionContext) DefaultRegistry(com.netflix.spectator.api.DefaultRegistry) ArrayList(java.util.ArrayList) List(java.util.List) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) Job(com.netflix.titus.api.jobmanager.model.job.Job) RelocationConfiguration(com.netflix.titus.supplementary.relocation.RelocationConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with JobDataReplicator

use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.

the class DefaultNodeConditionControllerTest method noTerminationsOnDataStaleness.

@Test
public void noTerminationsOnDataStaleness() {
    TitusRuntime titusRuntime = mock(TitusRuntime.class);
    when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
    RelocationConfiguration configuration = mock(RelocationConfiguration.class);
    when(configuration.getBadNodeConditionPattern()).thenReturn(".*Problem");
    when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
    when(configuration.getDataStalenessThresholdMs()).thenReturn(8000L);
    NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
    when(nodeDataResolver.getStalenessMs()).thenReturn(5L);
    JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
    when(jobDataReplicator.getStalenessMs()).thenReturn(10L);
    ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
    JobManagementClient jobManagementClient = mock(JobManagementClient.class);
    Set<String> terminatedTaskIds = new HashSet<>();
    when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
        String taskIdToBeTerminated = invocation.getArgument(0);
        terminatedTaskIds.add(taskIdToBeTerminated);
        return Mono.empty();
    });
    DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
    ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
    StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
    // No tasks terminated
    assertThat(terminatedTaskIds).isEmpty();
}
Also used : JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) ExecutionContext(com.netflix.titus.common.framework.scheduler.ExecutionContext) DefaultRegistry(com.netflix.spectator.api.DefaultRegistry) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) RelocationConfiguration(com.netflix.titus.supplementary.relocation.RelocationConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with JobDataReplicator

use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.

the class StreamDataReplicatorPerf method main.

public static void main(String[] args) throws InterruptedException {
    TitusRuntime titusRuntime = TitusRuntimes.internal();
    JobManagementClient client = Mockito.mock(JobManagementClient.class);
    JobConnectorConfiguration configuration = Mockito.mock(JobConnectorConfiguration.class);
    Mockito.when(client.observeJobs(ArgumentMatchers.any())).thenAnswer(invocation -> Flux.defer(() -> {
        JobManagerEvent jobUpdateEvent = JobUpdateEvent.newJob(JOB, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        JobManagerEvent taskUpdateEvent = TaskUpdateEvent.newTask(JOB, TASK, JobManagerConstants.GRPC_REPLICATOR_CALL_METADATA);
        return Flux.just(jobUpdateEvent, JobManagerEvent.snapshotMarker()).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).map(tick -> taskUpdateEvent)).concatWith(Flux.interval(Duration.ofSeconds(1)).take(1).flatMap(tick -> Flux.error(new RuntimeException("Simulated error"))));
    }));
    JobDataReplicator replicator = new JobDataReplicatorProvider(configuration, client, JobSnapshotFactories.newDefault(titusRuntime), titusRuntime).get();
    replicator.events().subscribe(System.out::println);
    Thread.sleep(3600_000);
}
Also used : JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) Job(com.netflix.titus.api.jobmanager.model.job.Job) ArgumentMatchers(org.mockito.ArgumentMatchers) JobUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent) JobSnapshotFactories(com.netflix.titus.runtime.connector.jobmanager.snapshot.JobSnapshotFactories) Task(com.netflix.titus.api.jobmanager.model.job.Task) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) Mockito(org.mockito.Mockito) Flux(reactor.core.publisher.Flux) TaskUpdateEvent(com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) Duration(java.time.Duration) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) JobDataReplicatorProvider(com.netflix.titus.runtime.connector.jobmanager.replicator.JobDataReplicatorProvider) JobConnectorConfiguration(com.netflix.titus.runtime.connector.jobmanager.JobConnectorConfiguration) JobManagerEvent(com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime)

Example 4 with JobDataReplicator

use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.

the class DefaultNodeConditionControllerTest method badNodeConditionsIgnoredForJobsNotOptingIn.

@Test
public void badNodeConditionsIgnoredForJobsNotOptingIn() {
    Map<String, TitusNode> nodeMap = buildNodes();
    List<Job<BatchJobExt>> jobs = getJobs(false);
    Map<String, List<Task>> stringListMap = buildTasksForJobAndNodeAssignment(new ArrayList<>(nodeMap.values()), jobs);
    TitusRuntime titusRuntime = mock(TitusRuntime.class);
    when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
    RelocationConfiguration configuration = mock(RelocationConfiguration.class);
    when(configuration.getBadNodeConditionPattern()).thenReturn(".*Failure");
    when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
    NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
    when(nodeDataResolver.resolve()).thenReturn(nodeMap);
    JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
    when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
    // Job attribute "terminateContainerOnBadAgent" = False
    ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
    when(readOnlyJobOperations.getJobs()).thenReturn(new ArrayList<>(jobs));
    stringListMap.forEach((key, value) -> when(readOnlyJobOperations.getTasks(key)).thenReturn(value));
    JobManagementClient jobManagementClient = mock(JobManagementClient.class);
    Set<String> terminatedTaskIds = new HashSet<>();
    when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
        String taskIdToBeTerminated = invocation.getArgument(0);
        terminatedTaskIds.add(taskIdToBeTerminated);
        return Mono.empty();
    });
    DefaultNodeConditionController nodeConditionController = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
    ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
    StepVerifier.create(nodeConditionController.handleNodesWithBadCondition(executionContext)).verifyComplete();
    // no tasks should be terminated for jobs
    assertThat(terminatedTaskIds).isEmpty();
}
Also used : JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient) NodeDataResolver(com.netflix.titus.supplementary.relocation.connector.NodeDataResolver) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) ExecutionContext(com.netflix.titus.common.framework.scheduler.ExecutionContext) DefaultRegistry(com.netflix.spectator.api.DefaultRegistry) ArrayList(java.util.ArrayList) List(java.util.List) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) Job(com.netflix.titus.api.jobmanager.model.job.Job) RelocationConfiguration(com.netflix.titus.supplementary.relocation.RelocationConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with JobDataReplicator

use of com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator in project titus-control-plane by Netflix.

the class RelocationConnectorStubs method getApplicationContext.

public ApplicationContext getApplicationContext() {
    StaticApplicationContext context = new StaticApplicationContext();
    context.getBeanFactory().registerSingleton("titusRuntime", titusRuntime);
    context.getBeanFactory().registerSingleton("nodeDataResolver", nodeDataResolver);
    context.getBeanFactory().registerSingleton("readOnlyJobOperations", jobOperations);
    context.getBeanFactory().registerSingleton("readOnlyEvictionOperations", evictionComponentStub.getEvictionOperations());
    context.getBeanFactory().registerSingleton("evictionServiceClient", evictionComponentStub.getEvictionServiceClient());
    context.getBeanFactory().registerSingleton("jobManagementClient", mock(JobManagementClient.class));
    JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
    when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
    context.getBeanFactory().registerSingleton("jobDataReplicator", jobDataReplicator);
    EvictionDataReplicator evictionDataReplicator = mock(EvictionDataReplicator.class);
    when(evictionDataReplicator.getStalenessMs()).thenReturn(0L);
    context.getBeanFactory().registerSingleton("evictionDataReplicator", evictionDataReplicator);
    context.refresh();
    return context;
}
Also used : JobDataReplicator(com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator) EvictionDataReplicator(com.netflix.titus.runtime.connector.eviction.EvictionDataReplicator) StaticApplicationContext(org.springframework.context.support.StaticApplicationContext) JobManagementClient(com.netflix.titus.runtime.connector.jobmanager.JobManagementClient)

Aggregations

JobDataReplicator (com.netflix.titus.runtime.connector.jobmanager.JobDataReplicator)5 JobManagementClient (com.netflix.titus.runtime.connector.jobmanager.JobManagementClient)5 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)4 DefaultRegistry (com.netflix.spectator.api.DefaultRegistry)3 Job (com.netflix.titus.api.jobmanager.model.job.Job)3 ReadOnlyJobOperations (com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations)3 ExecutionContext (com.netflix.titus.common.framework.scheduler.ExecutionContext)3 RelocationConfiguration (com.netflix.titus.supplementary.relocation.RelocationConfiguration)3 NodeDataResolver (com.netflix.titus.supplementary.relocation.connector.NodeDataResolver)3 HashSet (java.util.HashSet)3 Test (org.junit.Test)3 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)3 TitusNode (com.netflix.titus.supplementary.relocation.connector.TitusNode)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Task (com.netflix.titus.api.jobmanager.model.job.Task)1 JobManagerEvent (com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent)1 JobUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.JobUpdateEvent)1 TaskUpdateEvent (com.netflix.titus.api.jobmanager.model.job.event.TaskUpdateEvent)1 JobManagerConstants (com.netflix.titus.api.jobmanager.service.JobManagerConstants)1