Search in sources :

Example 1 with ServiceJobTask

use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.

the class ServiceDifferenceResolver method findMissingRunningTasks.

/**
 * Check that for each reference job task, there is a corresponding running task.
 */
private List<ChangeAction> findMissingRunningTasks(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, ServiceJobView runningJobView) {
    List<ChangeAction> missingTasks = new ArrayList<>();
    List<ServiceJobTask> tasks = refJobView.getTasks();
    for (ServiceJobTask refTask : tasks) {
        ServiceJobTask runningTask = runningJobView.getTaskById(refTask.getId());
        if (runtime.getComputeProvider().isReadyForScheduling()) {
            // TODO This complexity exists due to the way Fenzo is initialized on bootstrap. This code can be simplified one we move off Fenzo.
            if (runningTask == null || (refTask.getStatus().getState() == TaskState.Accepted && !TaskStatus.hasPod(refTask))) {
                missingTasks.add(BasicTaskActions.launchTaskInKube(configuration, runtime, engine, refJobView.getJob(), refTask, RECONCILER_CALLMETADATA.toBuilder().withCallReason("Launching task in Kube").build(), versionSupplier, titusRuntime));
            }
        }
    }
    return missingTasks;
}
Also used : TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)

Example 2 with ServiceJobTask

use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.

the class V1SpecPodFactoryTest method relocationLabel.

@Test
public void relocationLabel() {
    Job<ServiceJobExt> job = JobGenerator.oneServiceJob();
    Job<ServiceJobExt> selfManagedJob = job.toBuilder().withJobDescriptor(job.getJobDescriptor().but(jd -> jd.getDisruptionBudget().toBuilder().withDisruptionBudgetPolicy(SelfManagedDisruptionBudgetPolicy.newBuilder().build()))).build();
    ServiceJobTask task = JobGenerator.oneServiceTask();
    when(podAffinityFactory.buildV1Affinity(any(), eq(task))).thenReturn(Pair.of(new V1Affinity(), new HashMap<>()));
    V1Pod pod = podFactory.buildV1Pod(job, task);
    assertThat(pod.getMetadata().getLabels()).doesNotContainKey(KubeConstants.POD_LABEL_RELOCATION_BINPACK);
    V1Pod selfManagedPod = podFactory.buildV1Pod(selfManagedJob, task);
    assertThat(selfManagedPod.getMetadata().getLabels()).containsEntry(KubeConstants.POD_LABEL_RELOCATION_BINPACK, "SelfManaged");
}
Also used : V1Affinity(io.kubernetes.client.openapi.models.V1Affinity) HashMap(java.util.HashMap) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) V1Pod(io.kubernetes.client.openapi.models.V1Pod) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) Test(org.junit.Test)

Example 3 with ServiceJobTask

use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.

the class DefaultDeschedulerServiceTest method verifyRelocationPlan.

private void verifyRelocationPlan(long relocationDelay, String reasonMessage) {
    ReadOnlyJobOperations jobOperations = mock(ReadOnlyJobOperations.class);
    DefaultDeschedulerService dds = new DefaultDeschedulerService(jobOperations, mock(ReadOnlyEvictionOperations.class), new KubernetesNodeDataResolver(configuration, TestDataFactory.mockFabric8IOConnector(), node -> true), () -> "foo|bar", titusRuntime);
    Job<ServiceJobExt> job = JobGenerator.serviceJobs(oneTaskServiceJobDescriptor().but(ofServiceSize(2), withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList())))).getValue();
    ServiceJobTask task = JobGenerator.serviceTasks(job).getValue();
    when(jobOperations.getJob(job.getId())).thenReturn(Optional.of(job));
    TitusNode node = TitusNode.newBuilder().withId("node1").withServerGroupId("asg1").withRelocationRequired(true).withBadCondition(false).build();
    // Advance test clock
    long clockAdvancedMs = 5_000;
    TestClock testClock = (TestClock) titusRuntime.getClock();
    testClock.advanceTime(Duration.ofMillis(clockAdvancedMs));
    Optional<TaskRelocationPlan> relocationPlanForTask = dds.getRelocationPlanForTask(node, task, Collections.emptyMap());
    assertThat(relocationPlanForTask).isPresent();
    assertThat(relocationPlanForTask.get().getTaskId()).isEqualTo(task.getId());
    // relocation time is expected to be decision clock time + retentionTimeMs
    assertThat(relocationPlanForTask.get().getRelocationTime()).isEqualTo(relocationDelay + clockAdvancedMs);
    assertThat(relocationPlanForTask.get().getDecisionTime()).isEqualTo(clockAdvancedMs);
    assertThat(relocationPlanForTask.get().getReasonMessage()).isEqualTo(reasonMessage);
}
Also used : TestDataFactory(com.netflix.titus.supplementary.relocation.TestDataFactory) Archaius2Ext(com.netflix.titus.common.util.archaius2.Archaius2Ext) DisruptionBudgetGenerator.unlimitedRate(com.netflix.titus.testkit.model.eviction.DisruptionBudgetGenerator.unlimitedRate) RelocationAttributes(com.netflix.titus.runtime.RelocationAttributes) Task(com.netflix.titus.api.jobmanager.model.job.Task) DisruptionBudgetGenerator.budget(com.netflix.titus.testkit.model.eviction.DisruptionBudgetGenerator.budget) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KubernetesNodeDataResolver(com.netflix.titus.supplementary.relocation.connector.KubernetesNodeDataResolver) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) TitusRuntimes(com.netflix.titus.common.runtime.TitusRuntimes) RelocationConfiguration(com.netflix.titus.supplementary.relocation.RelocationConfiguration) Duration(java.time.Duration) RelocationConnectorStubs(com.netflix.titus.supplementary.relocation.RelocationConnectorStubs) JobFunctions.ofServiceSize(com.netflix.titus.api.jobmanager.model.job.JobFunctions.ofServiceSize) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) TaskRelocationReason(com.netflix.titus.api.relocation.model.TaskRelocationPlan.TaskRelocationReason) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) MutableDataGenerator(com.netflix.titus.common.data.generator.MutableDataGenerator) JobDescriptorGenerator.oneTaskServiceJobDescriptor(com.netflix.titus.testkit.model.job.JobDescriptorGenerator.oneTaskServiceJobDescriptor) DeschedulingResult(com.netflix.titus.supplementary.relocation.model.DeschedulingResult) DisruptionBudgetGenerator.selfManagedPolicy(com.netflix.titus.testkit.model.eviction.DisruptionBudgetGenerator.selfManagedPolicy) Job(com.netflix.titus.api.jobmanager.model.job.Job) JobFunctions.withJobId(com.netflix.titus.api.jobmanager.model.job.JobFunctions.withJobId) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) JobFunctions.withDisruptionBudget(com.netflix.titus.api.jobmanager.model.job.JobFunctions.withDisruptionBudget) JobGenerator(com.netflix.titus.testkit.model.job.JobGenerator) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) List(java.util.List) ReadOnlyEvictionOperations(com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations) Optional(java.util.Optional) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) TestClock(com.netflix.titus.common.util.time.TestClock) Collections(java.util.Collections) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) Mockito.mock(org.mockito.Mockito.mock) ReadOnlyJobOperations(com.netflix.titus.api.jobmanager.service.ReadOnlyJobOperations) KubernetesNodeDataResolver(com.netflix.titus.supplementary.relocation.connector.KubernetesNodeDataResolver) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) TestClock(com.netflix.titus.common.util.time.TestClock) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) ReadOnlyEvictionOperations(com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations) TitusNode(com.netflix.titus.supplementary.relocation.connector.TitusNode) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)

Example 4 with ServiceJobTask

use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.

the class GrpcJobManagementModelConverters method toGrpcTask.

public static com.netflix.titus.grpc.protogen.Task toGrpcTask(Task coreTask, LogStorageInfo<Task> logStorageInfo) {
    Map<String, String> taskContext = new HashMap<>(coreTask.getTaskContext());
    taskContext.put(TASK_ATTRIBUTES_TASK_ORIGINAL_ID, coreTask.getOriginalId());
    taskContext.put(TASK_ATTRIBUTES_RESUBMIT_NUMBER, Integer.toString(coreTask.getResubmitNumber()));
    taskContext.put(TASK_ATTRIBUTES_SYSTEM_RESUBMIT_NUMBER, Integer.toString(coreTask.getSystemResubmitNumber()));
    taskContext.put(TASK_ATTRIBUTES_EVICTION_RESUBMIT_NUMBER, Integer.toString(coreTask.getEvictionResubmitNumber()));
    coreTask.getResubmitOf().ifPresent(resubmitOf -> taskContext.put(TASK_ATTRIBUTES_TASK_RESUBMIT_OF, resubmitOf));
    if (coreTask instanceof BatchJobTask) {
        BatchJobTask batchTask = (BatchJobTask) coreTask;
        taskContext.put(TASK_ATTRIBUTES_TASK_INDEX, Integer.toString(batchTask.getIndex()));
    }
    com.netflix.titus.grpc.protogen.Task.Builder taskBuilder = com.netflix.titus.grpc.protogen.Task.newBuilder().setId(coreTask.getId()).setJobId(coreTask.getJobId()).setStatus(toGrpcTaskStatus(coreTask.getStatus())).addAllStatusHistory(toGrpcTaskStatusHistory(coreTask.getStatusHistory())).putAllTaskContext(taskContext).putAllAttributes(coreTask.getAttributes()).setLogLocation(toGrpcLogLocation(coreTask, logStorageInfo)).setVersion(toGrpcVersion(coreTask.getVersion()));
    if (coreTask instanceof ServiceJobTask) {
        ServiceJobTask serviceTask = (ServiceJobTask) coreTask;
        taskBuilder.setMigrationDetails(toGrpcMigrationDetails(serviceTask.getMigrationDetails()));
    }
    return taskBuilder.build();
}
Also used : BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) Task(com.netflix.titus.api.jobmanager.model.job.Task) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) HashMap(java.util.HashMap) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) ByteString(com.google.protobuf.ByteString) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)

Example 5 with ServiceJobTask

use of com.netflix.titus.api.jobmanager.model.job.ServiceJobTask in project titus-control-plane by Netflix.

the class ServiceDifferenceResolver method applyStore.

private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
    if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
        return Collections.emptyList();
    }
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder refJobHolder = refJobView.getJobHolder();
    Job<ServiceJobExt> refJob = refJobHolder.getEntity();
    if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
        actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
    }
    boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
    for (EntityHolder referenceTaskHolder : refJobHolder.getChildren()) {
        ServiceJobTask refTask = referenceTaskHolder.getEntity();
        Optional<EntityHolder> storeHolder = storeJob.findById(referenceTaskHolder.getId());
        ServiceJobTask storeTask = storeHolder.get().getEntity();
        boolean refAndStoreInSync = areEquivalent(storeHolder.get(), referenceTaskHolder);
        boolean shouldRetry = !isJobTerminating && refTask.getStatus().getState() == TaskState.Finished && !refTask.getStatus().getReasonCode().equals(TaskStatus.REASON_SCALED_DOWN) && allowedNewTasks.get() > 0;
        if (refAndStoreInSync) {
            TaskState currentTaskState = refTask.getStatus().getState();
            if (currentTaskState == TaskState.Finished) {
                if (isJobTerminating || isScaledDown(storeTask) || hasEnoughTasksRunning(refJobView)) {
                    actions.add(removeFinishedServiceTaskAction(jobStore, storeTask));
                } else if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTaskHolder, clock)) {
                    createNewTaskAction(refJobView, Optional.of(referenceTaskHolder), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
                }
            }
        } else {
            Task task = referenceTaskHolder.getEntity();
            CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
            actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
        }
        // Both current and delayed retries are counted
        if (shouldRetry) {
            allowedNewTasks.decrementAndGet();
        }
    }
    return actions;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState)

Aggregations

ServiceJobTask (com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)12 ServiceJobExt (com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt)5 Test (org.junit.Test)5 Task (com.netflix.titus.api.jobmanager.model.job.Task)4 V1Affinity (io.kubernetes.client.openapi.models.V1Affinity)3 Job (com.netflix.titus.api.jobmanager.model.job.Job)2 ChangeAction (com.netflix.titus.common.framework.reconciler.ChangeAction)2 TitusRuntimes (com.netflix.titus.common.runtime.TitusRuntimes)2 Retryer (com.netflix.titus.common.util.retry.Retryer)2 TitusChangeAction (com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction)2 JobGenerator (com.netflix.titus.testkit.model.job.JobGenerator)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 ByteString (com.google.protobuf.ByteString)1 FeatureActivationConfiguration (com.netflix.titus.api.FeatureActivationConfiguration)1 ReadOnlyEvictionOperations (com.netflix.titus.api.eviction.service.ReadOnlyEvictionOperations)1 JobAttributes (com.netflix.titus.api.jobmanager.JobAttributes)1 JobConstraints (com.netflix.titus.api.jobmanager.JobConstraints)1 BatchJobTask (com.netflix.titus.api.jobmanager.model.job.BatchJobTask)1 Container (com.netflix.titus.api.jobmanager.model.job.Container)1