use of com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt in project titus-control-plane by Netflix.
the class DifferenceResolverUtils method shouldRetry.
public static boolean shouldRetry(Job<?> job, Task task) {
TaskStatus taskStatus = task.getStatus();
if (taskStatus.getState() != TaskState.Finished || job.getStatus().getState() != JobState.Accepted) {
return false;
}
if (hasReachedRetryLimit(job, task)) {
return false;
}
if (!isBatch(job)) {
return true;
}
// Batch job
String killInitiatedReason = JobFunctions.findTaskStatus(task, TaskState.KillInitiated).map(ExecutableStatus::getReasonCode).orElse("N/A");
if (TaskStatus.REASON_RUNTIME_LIMIT_EXCEEDED.equals(killInitiatedReason)) {
BatchJobExt batchExt = (BatchJobExt) job.getJobDescriptor().getExtensions();
if (!batchExt.isRetryOnRuntimeLimit()) {
return false;
}
}
return !TaskStatus.REASON_NORMAL.equals(taskStatus.getReasonCode());
}
use of com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt in project titus-control-plane by Netflix.
the class JobCriteriaQueryTest method testSearchByJobState.
@Test(timeout = 30_000)
public void testSearchByJobState() throws Exception {
JobDescriptor<BatchJobExt> jobDescriptor = batchJobDescriptors().getValue().toBuilder().withApplicationName("testSearchByJobState").build();
String acceptedJobId = jobsScenarioBuilder.scheduleAndReturnJob(jobDescriptor, jobScenarioBuilder -> jobScenarioBuilder.template(ScenarioTemplates.launchJob())).getId();
String killInitiatedJobId = jobsScenarioBuilder.scheduleAndReturnJob(jobDescriptor, jobScenarioBuilder -> jobScenarioBuilder.template(ScenarioTemplates.launchJob()).killJob().expectJobUpdateEvent(job -> job.getStatus().getState() == JobState.KillInitiated, "Expected state: " + JobState.KillInitiated)).getId();
String acceptedTaskId = jobsScenarioBuilder.takeJob(acceptedJobId).getTaskByIndex(0).getTask().getId();
String killInitiatedTaskId = jobsScenarioBuilder.takeJob(killInitiatedJobId).getTaskByIndex(0).getTask().getId();
// Indexes are recomputed after events are sent, so if we run findJobs/findTasks immediately, they may use stale index.
Thread.sleep(10);
JobQuery.Builder jobQueryBuilder = JobQuery.newBuilder().putFilteringCriteria("applicationName", "testSearchByJobState").setPage(PAGE);
TaskQuery.Builder taskQueryBuilder = TaskQuery.newBuilder().putFilteringCriteria("applicationName", "testSearchByJobState").setPage(PAGE);
// Jobs (Accepted)
JobQueryResult acceptedJobQueryResult = client.findJobs(jobQueryBuilder.putFilteringCriteria("jobState", "Accepted").build());
assertThat(acceptedJobQueryResult.getItemsList()).hasSize(1);
Job acceptedJobQueryResultItem = acceptedJobQueryResult.getItems(0);
assertThat(acceptedJobQueryResultItem.getId()).isEqualTo(acceptedJobId);
// Jobs (KillInitiated)
JobQueryResult killInitJobQueryResult = client.findJobs(jobQueryBuilder.putFilteringCriteria("jobState", "KillInitiated").setPage(PAGE).build());
assertThat(killInitJobQueryResult.getItemsList()).hasSize(1);
Job killInitJobQueryResultItem = killInitJobQueryResult.getItems(0);
assertThat(killInitJobQueryResultItem.getId()).isEqualTo(killInitiatedJobId);
// Tasks (Accepted)
TaskQueryResult acceptedTaskQueryResult = client.findTasks(taskQueryBuilder.putFilteringCriteria("jobState", "Accepted").setPage(PAGE).build());
assertThat(acceptedTaskQueryResult.getItemsList()).hasSize(1);
assertThat(acceptedTaskQueryResult.getItems(0).getId()).isEqualTo(acceptedTaskId);
// Tasks (KillInitiated)
TaskQueryResult killInitTaskQueryResult = client.findTasks(taskQueryBuilder.putFilteringCriteria("jobState", "KillInitiated").setPage(PAGE).build());
assertThat(killInitTaskQueryResult.getItemsList()).hasSize(1);
assertThat(killInitTaskQueryResult.getItems(0).getId()).isEqualTo(killInitiatedTaskId);
}
use of com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt in project titus-control-plane by Netflix.
the class TitusQuotasManagerTest method isJobExemptFromSystemDisruptionWindow.
@Test
public void isJobExemptFromSystemDisruptionWindow() {
Job<BatchJobExt> job1 = JobGenerator.oneBatchJob().but(withApplicationName("app1Test"));
EvictionConfiguration config1 = mock(EvictionConfiguration.class);
when(config1.getAppsExemptFromSystemDisruptionWindow()).thenReturn("app1.*");
TitusQuotasManager titusQuotasManager = new TitusQuotasManager(null, null, null, null, config1, null);
boolean jobExemptFromSystemDisruptionBudget = titusQuotasManager.isJobExemptFromSystemDisruptionWindow(job1);
assertThat(jobExemptFromSystemDisruptionBudget).isTrue();
EvictionConfiguration config2 = mock(EvictionConfiguration.class);
when(config2.getAppsExemptFromSystemDisruptionWindow()).thenReturn("app2.*");
TitusQuotasManager titusQuotasManager2 = new TitusQuotasManager(null, null, null, null, config2, null);
boolean jobExemptFromSystemDisruptionBudget2 = titusQuotasManager2.isJobExemptFromSystemDisruptionWindow(job1);
assertThat(jobExemptFromSystemDisruptionBudget2).isFalse();
}
use of com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt in project titus-control-plane by Netflix.
the class TitusQuotasManagerTest method tryConsumeSystemAndJobQuota.
@Test
public void tryConsumeSystemAndJobQuota() {
String taskId = "job1Task1";
String jobQuotaRejectionReason = "Job does not allow any more terminations";
Job<BatchJobExt> job1 = JobGenerator.oneBatchJob().but(withApplicationName("app1Test"));
EvictionConfiguration config1 = mock(EvictionConfiguration.class);
when(config1.getAppsExemptFromSystemDisruptionWindow()).thenReturn("app1.*");
SystemQuotaController systemQuotaController = mock(SystemQuotaController.class);
when(systemQuotaController.consume(taskId)).thenReturn(ConsumptionResult.approved());
JobQuotaController jobQuotaController = mock(JobQuotaController.class);
when(jobQuotaController.consume(taskId)).thenReturn(ConsumptionResult.rejected(jobQuotaRejectionReason));
TitusQuotasManager titusQuotasManager = new TitusQuotasManager(null, null, null, systemQuotaController, config1, null);
ConsumptionResult consumptionResult = titusQuotasManager.tryConsumeSystemAndJobQuota(jobQuotaController, job1, taskId);
assertThat(consumptionResult.isApproved()).isFalse();
assertThat(consumptionResult.getRejectionReason()).isPresent();
assertThat(consumptionResult.getRejectionReason().get()).isEqualTo(jobQuotaRejectionReason);
JobQuotaController jobQuotaController2 = mock(JobQuotaController.class);
when(jobQuotaController2.consume(taskId)).thenReturn(ConsumptionResult.approved());
ConsumptionResult consumptionResult2 = titusQuotasManager.tryConsumeSystemAndJobQuota(jobQuotaController2, job1, taskId);
assertThat(consumptionResult2.isApproved()).isTrue();
String quotaLimitExceededReason = SystemQuotaConsumptionResults.QUOTA_LIMIT_EXCEEDED.getRejectionReason().get();
when(systemQuotaController.consume(taskId)).thenReturn(ConsumptionResult.rejected(quotaLimitExceededReason));
ConsumptionResult consumptionResult3 = titusQuotasManager.tryConsumeSystemAndJobQuota(jobQuotaController2, job1, taskId);
assertThat(consumptionResult3.isApproved()).isFalse();
assertThat(consumptionResult3.getRejectionReason()).isPresent();
assertThat(consumptionResult3.getRejectionReason().get()).isEqualTo(quotaLimitExceededReason);
String outsideSystemWindowReason = SystemQuotaConsumptionResults.OUTSIDE_SYSTEM_TIME_WINDOW.getRejectionReason().get();
when(systemQuotaController.consume(taskId)).thenReturn(ConsumptionResult.rejected(outsideSystemWindowReason));
ConsumptionResult consumptionResult4 = titusQuotasManager.tryConsumeSystemAndJobQuota(jobQuotaController2, job1, taskId);
assertThat(consumptionResult4.isApproved()).isTrue();
}
use of com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt in project titus-control-plane by Netflix.
the class JobQuotaControllerTest method testMergeTaskRelocationLimitController.
@Test
public void testMergeTaskRelocationLimitController() {
// First version
Job<BatchJobExt> job = newBatchJob(10, budget(perTaskRelocationLimitPolicy(1), unlimitedRate(), Collections.emptyList()));
scheduleJob(job, 10);
List<QuotaController<Job<?>>> controllers = buildQuotaControllers(job, jobOperations, SelfJobDisruptionBudgetResolver.getInstance(), titusRuntime);
TaskRelocationLimitController controller = (TaskRelocationLimitController) controllers.get(0);
Task task = jobOperations.getTasks(job.getId()).get(0);
assertThat(controller.consume(task.getId()).isApproved()).isTrue();
jobComponentStub.killTask(task, false, false, V3JobOperations.Trigger.Eviction);
assertThat(controller.consume(task.getId()).isApproved()).isFalse();
Task replacement1 = jobComponentStub.getJobOperations().getTasks().stream().filter(t -> t.getOriginalId().equals(task.getId())).findFirst().get();
jobComponentStub.moveTaskToState(replacement1, TaskState.Started);
// Change job descriptor and consume some quota
Job<BatchJobExt> updatedJob = jobComponentStub.changeJob(exceptPolicy(job, perTaskRelocationLimitPolicy(3)));
List<QuotaController<Job<?>>> merged = mergeQuotaControllers(controllers, updatedJob, jobOperations, SelfJobDisruptionBudgetResolver.getInstance(), titusRuntime);
TaskRelocationLimitController updatedController = (TaskRelocationLimitController) merged.get(0);
// Evict replacement 1
assertThat(updatedController.consume(replacement1.getId()).isApproved()).isTrue();
jobComponentStub.killTask(replacement1, false, false, V3JobOperations.Trigger.Eviction);
Task replacement2 = jobComponentStub.getJobOperations().getTasks().stream().filter(t -> t.getOriginalId().equals(task.getId())).findFirst().get();
jobComponentStub.moveTaskToState(replacement2, TaskState.Started);
// Evict replacement 2
assertThat(updatedController.consume(replacement2.getId()).isApproved()).isTrue();
jobComponentStub.killTask(replacement2, false, false, V3JobOperations.Trigger.Eviction);
Task replacement3 = jobComponentStub.getJobOperations().getTasks().stream().filter(t -> t.getOriginalId().equals(task.getId())).findFirst().get();
jobComponentStub.moveTaskToState(replacement3, TaskState.Started);
assertThat(updatedController.consume(replacement3.getId()).isApproved()).isFalse();
}
Aggregations