use of com.netflix.titus.common.runtime.TitusRuntime in project titus-control-plane by Netflix.
the class JobQuotaControllerTest method testSelfManagedJobUsesInternalDisruptionBudget.
@Test
public void testSelfManagedJobUsesInternalDisruptionBudget() {
Job<BatchJobExt> job = newBatchJob(10, budget(selfManagedPolicy(1_000), unlimitedRate(), Collections.emptyList()));
com.netflix.titus.api.model.reference.Reference jobReference = com.netflix.titus.api.model.reference.Reference.job(job.getId());
scheduleJob(job, 10);
EffectiveJobDisruptionBudgetResolver budgetResolver = j -> budget(perTaskRelocationLimitPolicy(100), hourlyRatePercentage(5), singletonList(officeHourTimeWindow()));
JobQuotaController jobController = new JobQuotaController(job, jobOperations, budgetResolver, containerHealthService, titusRuntime);
assertThat(jobController.getQuota(jobReference).getQuota()).isEqualTo(1);
Task task = jobOperations.getTasks(job.getId()).get(0);
assertThat(jobController.consume(task.getId()).isApproved()).isTrue();
assertThat(jobController.getQuota(jobReference).getQuota()).isEqualTo(0);
ConsumptionResult failure = jobController.consume(task.getId());
assertThat(failure.isApproved()).isFalse();
assertThat(failure.getRejectionReason().get()).contains("JobPercentagePerHourRelocationRateController");
}
use of com.netflix.titus.common.runtime.TitusRuntime in project titus-control-plane by Netflix.
the class DefaultDeschedulerServiceTest method verifyRelocationPlan.
private void verifyRelocationPlan(long relocationDelay, String reasonMessage) {
ReadOnlyJobOperations jobOperations = mock(ReadOnlyJobOperations.class);
DefaultDeschedulerService dds = new DefaultDeschedulerService(jobOperations, mock(ReadOnlyEvictionOperations.class), new KubernetesNodeDataResolver(configuration, TestDataFactory.mockFabric8IOConnector(), node -> true), () -> "foo|bar", titusRuntime);
Job<ServiceJobExt> job = JobGenerator.serviceJobs(oneTaskServiceJobDescriptor().but(ofServiceSize(2), withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList())))).getValue();
ServiceJobTask task = JobGenerator.serviceTasks(job).getValue();
when(jobOperations.getJob(job.getId())).thenReturn(Optional.of(job));
TitusNode node = TitusNode.newBuilder().withId("node1").withServerGroupId("asg1").withRelocationRequired(true).withBadCondition(false).build();
// Advance test clock
long clockAdvancedMs = 5_000;
TestClock testClock = (TestClock) titusRuntime.getClock();
testClock.advanceTime(Duration.ofMillis(clockAdvancedMs));
Optional<TaskRelocationPlan> relocationPlanForTask = dds.getRelocationPlanForTask(node, task, Collections.emptyMap());
assertThat(relocationPlanForTask).isPresent();
assertThat(relocationPlanForTask.get().getTaskId()).isEqualTo(task.getId());
// relocation time is expected to be decision clock time + retentionTimeMs
assertThat(relocationPlanForTask.get().getRelocationTime()).isEqualTo(relocationDelay + clockAdvancedMs);
assertThat(relocationPlanForTask.get().getDecisionTime()).isEqualTo(clockAdvancedMs);
assertThat(relocationPlanForTask.get().getReasonMessage()).isEqualTo(reasonMessage);
}
use of com.netflix.titus.common.runtime.TitusRuntime in project titus-control-plane by Netflix.
the class DefaultNodeConditionControllerTest method checkTasksTerminatedDueToBadNodeConditions.
@Test
public void checkTasksTerminatedDueToBadNodeConditions() {
// Mock jobs, tasks & nodes
Map<String, TitusNode> nodeMap = buildNodes();
List<Job<BatchJobExt>> jobs = getJobs(true);
Map<String, List<Task>> tasksByJobIdMap = buildTasksForJobAndNodeAssignment(new ArrayList<>(nodeMap.values()), jobs);
TitusRuntime titusRuntime = mock(TitusRuntime.class);
when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
RelocationConfiguration configuration = mock(RelocationConfiguration.class);
when(configuration.getBadNodeConditionPattern()).thenReturn(".*Failure");
when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
when(nodeDataResolver.resolve()).thenReturn(nodeMap);
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(0L);
ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
when(readOnlyJobOperations.getJobs()).thenReturn(new ArrayList<>(jobs));
tasksByJobIdMap.forEach((key, value) -> when(readOnlyJobOperations.getTasks(key)).thenReturn(value));
JobManagementClient jobManagementClient = mock(JobManagementClient.class);
Set<String> terminatedTaskIds = new HashSet<>();
when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
String taskIdToBeTerminated = invocation.getArgument(0);
terminatedTaskIds.add(taskIdToBeTerminated);
return Mono.empty();
});
DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
assertThat(terminatedTaskIds).isNotEmpty();
assertThat(terminatedTaskIds.size()).isEqualTo(2);
verifyTerminatedTasksOnBadNodes(terminatedTaskIds, tasksByJobIdMap, nodeMap);
}
use of com.netflix.titus.common.runtime.TitusRuntime in project titus-control-plane by Netflix.
the class DefaultNodeConditionControllerTest method noTerminationsOnDataStaleness.
@Test
public void noTerminationsOnDataStaleness() {
TitusRuntime titusRuntime = mock(TitusRuntime.class);
when(titusRuntime.getRegistry()).thenReturn(new DefaultRegistry());
RelocationConfiguration configuration = mock(RelocationConfiguration.class);
when(configuration.getBadNodeConditionPattern()).thenReturn(".*Problem");
when(configuration.isTaskTerminationOnBadNodeConditionEnabled()).thenReturn(true);
when(configuration.getDataStalenessThresholdMs()).thenReturn(8000L);
NodeDataResolver nodeDataResolver = mock(NodeDataResolver.class);
when(nodeDataResolver.getStalenessMs()).thenReturn(5L);
JobDataReplicator jobDataReplicator = mock(JobDataReplicator.class);
when(jobDataReplicator.getStalenessMs()).thenReturn(10L);
ReadOnlyJobOperations readOnlyJobOperations = mock(ReadOnlyJobOperations.class);
JobManagementClient jobManagementClient = mock(JobManagementClient.class);
Set<String> terminatedTaskIds = new HashSet<>();
when(jobManagementClient.killTask(anyString(), anyBoolean(), any())).thenAnswer(invocation -> {
String taskIdToBeTerminated = invocation.getArgument(0);
terminatedTaskIds.add(taskIdToBeTerminated);
return Mono.empty();
});
DefaultNodeConditionController nodeConditionCtrl = new DefaultNodeConditionController(configuration, nodeDataResolver, jobDataReplicator, readOnlyJobOperations, jobManagementClient, titusRuntime);
ExecutionContext executionContext = ExecutionContext.newBuilder().withIteration(ExecutionId.initial()).build();
StepVerifier.create(nodeConditionCtrl.handleNodesWithBadCondition(executionContext)).verifyComplete();
// No tasks terminated
assertThat(terminatedTaskIds).isEmpty();
}
use of com.netflix.titus.common.runtime.TitusRuntime in project titus-control-plane by Netflix.
the class DefaultDeschedulerService method deschedule.
@Override
public List<DeschedulingResult> deschedule(Map<String, TaskRelocationPlan> plannedAheadTaskRelocationPlans) {
List<Pair<Job, List<Task>>> allJobsAndTasks = jobOperations.getJobsAndTasks();
Map<String, Job<?>> jobs = allJobsAndTasks.stream().map(Pair::getLeft).collect(Collectors.toMap(Job::getId, j -> j));
Map<String, Task> tasksById = allJobsAndTasks.stream().flatMap(p -> p.getRight().stream()).collect(Collectors.toMap(Task::getId, t -> t));
EvacuatedAgentsAllocationTracker evacuatedAgentsAllocationTracker = new EvacuatedAgentsAllocationTracker(nodeDataResolver.resolve(), tasksById);
EvictionQuotaTracker evictionQuotaTracker = new EvictionQuotaTracker(evictionOperations, jobs);
TaskMigrationDescheduler taskMigrationDescheduler = new TaskMigrationDescheduler(plannedAheadTaskRelocationPlans, evacuatedAgentsAllocationTracker, evictionQuotaTracker, evictionConfiguration, jobs, tasksById, titusRuntime);
Map<String, DeschedulingResult> requestedImmediateEvictions = taskMigrationDescheduler.findAllImmediateEvictions();
Map<String, DeschedulingResult> requestedEvictions = taskMigrationDescheduler.findRequestedJobOrTaskMigrations();
Map<String, DeschedulingResult> allRequestedEvictions = CollectionsExt.merge(requestedImmediateEvictions, requestedEvictions);
Map<String, DeschedulingResult> regularEvictions = new HashMap<>();
Optional<Pair<TitusNode, List<Task>>> bestMatch;
while ((bestMatch = taskMigrationDescheduler.nextBestMatch()).isPresent()) {
TitusNode agent = bestMatch.get().getLeft();
List<Task> tasks = bestMatch.get().getRight();
tasks.forEach(task -> {
if (!allRequestedEvictions.containsKey(task.getId())) {
Optional<TaskRelocationPlan> relocationPlanForTask = getRelocationPlanForTask(agent, task, plannedAheadTaskRelocationPlans);
relocationPlanForTask.ifPresent(rp -> regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(rp).build()));
}
});
}
// Find eviction which could not be scheduled now.
for (Task task : tasksById.values()) {
if (allRequestedEvictions.containsKey(task.getId()) || regularEvictions.containsKey(task.getId())) {
continue;
}
if (evacuatedAgentsAllocationTracker.isEvacuated(task)) {
DeschedulingFailure failure = taskMigrationDescheduler.getDeschedulingFailure(task);
TaskRelocationPlan relocationPlan = plannedAheadTaskRelocationPlans.get(task.getId());
if (relocationPlan == null) {
relocationPlan = newNotDelayedRelocationPlan(task, false);
}
TitusNode agent = evacuatedAgentsAllocationTracker.getRemovableAgent(task);
regularEvictions.put(task.getId(), DeschedulingResult.newBuilder().withTask(task).withAgentInstance(agent).withTaskRelocationPlan(relocationPlan).withFailure(failure).build());
}
}
return CollectionsExt.merge(new ArrayList<>(allRequestedEvictions.values()), new ArrayList<>(regularEvictions.values()));
}
Aggregations