use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.
the class ServiceDifferenceResolver method findMissingRunningTasks.
/**
* Check that for each reference job task, there is a corresponding running task.
*/
private List<ChangeAction> findMissingRunningTasks(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, ServiceJobView runningJobView) {
List<ChangeAction> missingTasks = new ArrayList<>();
List<ServiceJobTask> tasks = refJobView.getTasks();
for (ServiceJobTask refTask : tasks) {
ServiceJobTask runningTask = runningJobView.getTaskById(refTask.getId());
if (runtime.getComputeProvider().isReadyForScheduling()) {
// TODO This complexity exists due to the way Fenzo is initialized on bootstrap. This code can be simplified one we move off Fenzo.
if (runningTask == null || (refTask.getStatus().getState() == TaskState.Accepted && !TaskStatus.hasPod(refTask))) {
missingTasks.add(BasicTaskActions.launchTaskInKube(configuration, runtime, engine, refJobView.getJob(), refTask, RECONCILER_CALLMETADATA.toBuilder().withCallReason("Launching task in Kube").build(), versionSupplier, titusRuntime));
}
}
}
return missingTasks;
}
use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.
the class V1SpecPodFactoryTest method relocationLabel.
@Test
public void relocationLabel() {
Job<ServiceJobExt> job = JobGenerator.oneServiceJob();
Job<ServiceJobExt> selfManagedJob = job.toBuilder().withJobDescriptor(job.getJobDescriptor().but(jd -> jd.getDisruptionBudget().toBuilder().withDisruptionBudgetPolicy(SelfManagedDisruptionBudgetPolicy.newBuilder().build()))).build();
ServiceJobTask task = JobGenerator.oneServiceTask();
when(podAffinityFactory.buildV1Affinity(any(), eq(task))).thenReturn(Pair.of(new V1Affinity(), new HashMap<>()));
V1Pod pod = podFactory.buildV1Pod(job, task);
assertThat(pod.getMetadata().getLabels()).doesNotContainKey(KubeConstants.POD_LABEL_RELOCATION_BINPACK);
V1Pod selfManagedPod = podFactory.buildV1Pod(selfManagedJob, task);
assertThat(selfManagedPod.getMetadata().getLabels()).containsEntry(KubeConstants.POD_LABEL_RELOCATION_BINPACK, "SelfManaged");
}
use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.
the class DefaultDeschedulerServiceTest method verifyRelocationPlan.
private void verifyRelocationPlan(long relocationDelay, String reasonMessage) {
ReadOnlyJobOperations jobOperations = mock(ReadOnlyJobOperations.class);
DefaultDeschedulerService dds = new DefaultDeschedulerService(jobOperations, mock(ReadOnlyEvictionOperations.class), new KubernetesNodeDataResolver(configuration, TestDataFactory.mockFabric8IOConnector(), node -> true), () -> "foo|bar", titusRuntime);
Job<ServiceJobExt> job = JobGenerator.serviceJobs(oneTaskServiceJobDescriptor().but(ofServiceSize(2), withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList())))).getValue();
ServiceJobTask task = JobGenerator.serviceTasks(job).getValue();
when(jobOperations.getJob(job.getId())).thenReturn(Optional.of(job));
TitusNode node = TitusNode.newBuilder().withId("node1").withServerGroupId("asg1").withRelocationRequired(true).withBadCondition(false).build();
// Advance test clock
long clockAdvancedMs = 5_000;
TestClock testClock = (TestClock) titusRuntime.getClock();
testClock.advanceTime(Duration.ofMillis(clockAdvancedMs));
Optional<TaskRelocationPlan> relocationPlanForTask = dds.getRelocationPlanForTask(node, task, Collections.emptyMap());
assertThat(relocationPlanForTask).isPresent();
assertThat(relocationPlanForTask.get().getTaskId()).isEqualTo(task.getId());
// relocation time is expected to be decision clock time + retentionTimeMs
assertThat(relocationPlanForTask.get().getRelocationTime()).isEqualTo(relocationDelay + clockAdvancedMs);
assertThat(relocationPlanForTask.get().getDecisionTime()).isEqualTo(clockAdvancedMs);
assertThat(relocationPlanForTask.get().getReasonMessage()).isEqualTo(reasonMessage);
}
use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.
the class GrpcJobManagementModelConverters method toGrpcTask.
public static com.netflix.titus.grpc.protogen.Task toGrpcTask(Task coreTask, LogStorageInfo<Task> logStorageInfo) {
Map<String, String> taskContext = new HashMap<>(coreTask.getTaskContext());
taskContext.put(TASK_ATTRIBUTES_TASK_ORIGINAL_ID, coreTask.getOriginalId());
taskContext.put(TASK_ATTRIBUTES_RESUBMIT_NUMBER, Integer.toString(coreTask.getResubmitNumber()));
taskContext.put(TASK_ATTRIBUTES_SYSTEM_RESUBMIT_NUMBER, Integer.toString(coreTask.getSystemResubmitNumber()));
taskContext.put(TASK_ATTRIBUTES_EVICTION_RESUBMIT_NUMBER, Integer.toString(coreTask.getEvictionResubmitNumber()));
coreTask.getResubmitOf().ifPresent(resubmitOf -> taskContext.put(TASK_ATTRIBUTES_TASK_RESUBMIT_OF, resubmitOf));
if (coreTask instanceof BatchJobTask) {
BatchJobTask batchTask = (BatchJobTask) coreTask;
taskContext.put(TASK_ATTRIBUTES_TASK_INDEX, Integer.toString(batchTask.getIndex()));
}
com.netflix.titus.grpc.protogen.Task.Builder taskBuilder = com.netflix.titus.grpc.protogen.Task.newBuilder().setId(coreTask.getId()).setJobId(coreTask.getJobId()).setStatus(toGrpcTaskStatus(coreTask.getStatus())).addAllStatusHistory(toGrpcTaskStatusHistory(coreTask.getStatusHistory())).putAllTaskContext(taskContext).putAllAttributes(coreTask.getAttributes()).setLogLocation(toGrpcLogLocation(coreTask, logStorageInfo)).setVersion(toGrpcVersion(coreTask.getVersion()));
if (coreTask instanceof ServiceJobTask) {
ServiceJobTask serviceTask = (ServiceJobTask) coreTask;
taskBuilder.setMigrationDetails(toGrpcMigrationDetails(serviceTask.getMigrationDetails()));
}
return taskBuilder.build();
}
use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.
the class ServiceDifferenceResolver method applyStore.
private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
return Collections.emptyList();
}
List<ChangeAction> actions = new ArrayList<>();
EntityHolder refJobHolder = refJobView.getJobHolder();
Job<ServiceJobExt> refJob = refJobHolder.getEntity();
if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
}
boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
for (EntityHolder referenceTaskHolder : refJobHolder.getChildren()) {
ServiceJobTask refTask = referenceTaskHolder.getEntity();
Optional<EntityHolder> storeHolder = storeJob.findById(referenceTaskHolder.getId());
ServiceJobTask storeTask = storeHolder.get().getEntity();
boolean refAndStoreInSync = areEquivalent(storeHolder.get(), referenceTaskHolder);
boolean shouldRetry = !isJobTerminating && refTask.getStatus().getState() == TaskState.Finished && !refTask.getStatus().getReasonCode().equals(TaskStatus.REASON_SCALED_DOWN) && allowedNewTasks.get() > 0;
if (refAndStoreInSync) {
TaskState currentTaskState = refTask.getStatus().getState();
if (currentTaskState == TaskState.Finished) {
if (isJobTerminating || isScaledDown(storeTask) || hasEnoughTasksRunning(refJobView)) {
actions.add(removeFinishedServiceTaskAction(jobStore, storeTask));
} else if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTaskHolder, clock)) {
createNewTaskAction(refJobView, Optional.of(referenceTaskHolder), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
}
}
} else {
Task task = referenceTaskHolder.getEntity();
CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
}
// Both current and delayed retries are counted
if (shouldRetry) {
allowedNewTasks.decrementAndGet();
}
}
return actions;
}
Aggregations