Search in sources :

Example 21 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BasicTaskActions method updateTaskInRunningModel.

/**
 * Update a task in the reference and running models. If a task moves to Finished state, add retry delay information
 * to the task, and to task entity holder (see {@link TaskRetryers}).
 */
public static TitusChangeAction updateTaskInRunningModel(String taskId, Trigger trigger, JobManagerConfiguration configuration, ReconciliationEngine<JobManagerReconcilerEvent> engine, Function<Task, Optional<Task>> changeFunction, String reason, VersionSupplier versionSupplier, TitusRuntime titusRuntime, CallMetadata callMetadata) {
    return TitusChangeAction.newAction("updateTaskInRunningModel").id(taskId).trigger(trigger).summary(reason).callMetadata(callMetadata).applyModelUpdates(self -> {
        Optional<EntityHolder> taskOptional = JobEntityHolders.expectTaskHolder(engine, taskId, titusRuntime);
        if (!taskOptional.isPresent()) {
            return Collections.emptyList();
        }
        EntityHolder taskHolder = taskOptional.get();
        Task oldTask = taskHolder.getEntity();
        Optional<Task> maybeNewTask = changeFunction.apply(oldTask);
        if (!maybeNewTask.isPresent()) {
            return Collections.emptyList();
        }
        Task newTask = VersionSuppliers.nextVersion(maybeNewTask.get(), versionSupplier);
        // Handle separately reference and runtime models, as only reference model gets retry attributes.
        List<ModelActionHolder> modelActionHolders = new ArrayList<>();
        // Add retryer data to task context.
        if (newTask.getStatus().getState() == TaskState.Finished) {
            modelActionHolders.add(ModelActionHolder.reference(attachRetryer(self, taskHolder, newTask, callMetadata, configuration, titusRuntime)));
        } else {
            modelActionHolders.add(ModelActionHolder.reference(TitusModelAction.newModelUpdate(self).taskUpdate(newTask)));
        }
        modelActionHolders.add(ModelActionHolder.running(TitusModelAction.newModelUpdate(self).taskUpdate(newTask)));
        return modelActionHolders;
    });
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder)

Example 22 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BasicTaskActions method attachRetryer.

private static TitusModelAction attachRetryer(TitusChangeAction.Builder self, EntityHolder taskHolder, Task updatedTask, CallMetadata callMetadata, JobManagerConfiguration configuration, TitusRuntime titusRuntime) {
    long retryDelayMs = TaskRetryers.getCurrentRetryerDelayMs(taskHolder, configuration.getMinRetryIntervalMs(), configuration.getTaskRetryerResetTimeMs(), titusRuntime.getClock());
    String retryDelayString = DateTimeExt.toTimeUnitString(retryDelayMs);
    updatedTask = updatedTask.toBuilder().addToTaskContext(TaskAttributes.TASK_ATTRIBUTES_RETRY_DELAY, retryDelayString).build();
    EntityHolder newTaskHolder = taskHolder.setEntity(updatedTask).addTag(TaskRetryers.ATTR_TASK_RETRY_DELAY_MS, retryDelayMs);
    return TitusModelAction.newModelUpdate(self).summary("Setting retry delay on task in Finished state: %s", retryDelayString).callMetadata(callMetadata).addTaskHolder(newTaskHolder);
}
Also used : EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 23 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BasicTaskActions method launchTaskInKube.

/**
 * Create pod for a task.
 */
public static TitusChangeAction launchTaskInKube(JobManagerConfiguration configuration, JobServiceRuntime runtime, ReconciliationEngine<JobManagerReconcilerEvent> engine, Job<?> job, Task task, CallMetadata callMetadata, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
    return TitusChangeAction.newAction("launchTaskInKube").task(task).trigger(V3JobOperations.Trigger.Reconciler).summary("Adding task to Kube").callMetadata(callMetadata).changeWithModelUpdates(self -> {
        EntityHolder taskHolder = JobEntityHolders.expectTaskHolder(engine, task.getId(), titusRuntime).orElse(null);
        if (taskHolder == null) {
            // This should never happen.
            return Observable.just(Collections.emptyList());
        }
        return ReactorExt.toCompletable(runtime.getComputeProvider().launchTask(job, task).then()).andThen(Observable.fromCallable(() -> {
            TaskStatus taskStatus = JobModel.newTaskStatus().withState(TaskState.Accepted).withReasonCode(TaskStatus.REASON_POD_CREATED).withReasonMessage("Created pod in Kubernetes via KubeScheduler. Needs to be scheduled on a node.").withTimestamp(titusRuntime.getClock().wallTime()).build();
            Task taskWithPod = task.toBuilder().withTaskContext(CollectionsExt.copyAndAdd(task.getTaskContext(), TaskAttributes.TASK_ATTRIBUTES_POD_CREATED, "true")).withStatus(taskStatus).withStatusHistory(CollectionsExt.copyAndAdd(task.getStatusHistory(), task.getStatus())).build();
            taskWithPod = VersionSuppliers.nextVersion(taskWithPod, versionSupplier);
            TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(taskWithPod);
            return ModelActionHolder.referenceAndRunning(modelUpdateAction);
        })).onErrorReturn(error -> {
            // Move task to the finished state after we failed to create a pod object for it.
            String reasonCode = runtime.getComputeProvider().resolveReasonCode(error);
            Task finishedTask = JobFunctions.changeTaskStatus(task, JobModel.newTaskStatus().withState(TaskState.Finished).withReasonCode(reasonCode).withReasonMessage("Failed to create pod: " + ExceptionExt.toMessageChain(error)).withTimestamp(titusRuntime.getClock().wallTime()).build());
            finishedTask = VersionSuppliers.nextVersion(finishedTask, versionSupplier);
            List<ModelActionHolder> modelActionHolders = new ArrayList<>();
            modelActionHolders.add(ModelActionHolder.reference(attachRetryer(self, taskHolder, finishedTask, callMetadata, configuration, titusRuntime)));
            modelActionHolders.add(ModelActionHolder.running(TitusModelAction.newModelUpdate(self).taskUpdate(finishedTask)));
            return modelActionHolders;
        });
    });
}
Also used : TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder)

Example 24 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BasicTaskActions method writeReferenceTaskToStore.

/**
 * Write updated task record to a store. If a task is completed, remove it from the scheduling service.
 * This command calls {@link JobStore#updateTask(Task)}, which assumes that the task record was created already.
 */
public static TitusChangeAction writeReferenceTaskToStore(JobStore titusStore, ReconciliationEngine<JobManagerReconcilerEvent> engine, String taskId, CallMetadata callMetadata, TitusRuntime titusRuntime) {
    return TitusChangeAction.newAction("writeReferenceTaskToStore").trigger(V3JobOperations.Trigger.Reconciler).id(taskId).summary("Persisting task to the store").callMetadata(callMetadata).changeWithModelUpdate(self -> {
        Optional<EntityHolder> taskHolder = engine.getReferenceView().findById(taskId);
        if (!taskHolder.isPresent()) {
            // Should never happen
            titusRuntime.getCodeInvariants().inconsistent("Reference task with id %s not found.", taskId);
            return Observable.empty();
        }
        Task referenceTask = taskHolder.get().getEntity();
        return titusStore.updateTask(referenceTask).andThen(Observable.fromCallable(() -> {
            TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(storeRoot -> {
                EntityHolder storedHolder = EntityHolder.newRoot(referenceTask.getId(), referenceTask);
                return Pair.of(storeRoot.addChild(storedHolder), storedHolder);
            });
            return ModelActionHolder.store(modelUpdateAction);
        }));
    });
}
Also used : Trigger(com.netflix.titus.api.jobmanager.service.V3JobOperations.Trigger) DateTimeExt(com.netflix.titus.common.util.DateTimeExt) JobModel(com.netflix.titus.api.jobmanager.model.job.JobModel) JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Observable(rx.Observable) Pair(com.netflix.titus.common.util.tuple.Pair) JobManagerConfiguration(com.netflix.titus.master.jobmanager.service.JobManagerConfiguration) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) ExceptionExt(com.netflix.titus.common.util.ExceptionExt) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) TaskRetryers(com.netflix.titus.master.jobmanager.service.common.action.TaskRetryers) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) VersionSuppliers(com.netflix.titus.master.jobmanager.service.VersionSuppliers) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 25 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class KillInitiatedActions method reconcilerInitiatedAllTasksKillInitiated.

/**
 * For all active tasks, send terminate command to the compute provider, and change their state to {@link TaskState#KillInitiated}.
 * This method is used for internal state reconciliation.
 */
public static List<ChangeAction> reconcilerInitiatedAllTasksKillInitiated(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime runtime, JobStore jobStore, String reasonCode, String reason, int concurrencyLimit, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
    List<ChangeAction> result = new ArrayList<>();
    EntityHolder runningView = engine.getRunningView();
    Set<String> runningTaskIds = new HashSet<>();
    runningView.getChildren().forEach(taskHolder -> runningTaskIds.add(taskHolder.<Task>getEntity().getId()));
    // Immediately finish Accepted tasks, which are not yet in the running model.
    for (EntityHolder entityHolder : engine.getReferenceView().getChildren()) {
        if (result.size() >= concurrencyLimit) {
            return result;
        }
        Task task = entityHolder.getEntity();
        TaskState state = task.getStatus().getState();
        if (state == TaskState.Accepted && !runningTaskIds.contains(task.getId())) {
            result.add(BasicTaskActions.updateTaskAndWriteItToStore(task.getId(), engine, taskRef -> JobFunctions.changeTaskStatus(taskRef, TaskState.Finished, reasonCode, reason, titusRuntime.getClock()), jobStore, V3JobOperations.Trigger.Reconciler, reason, versionSupplier, titusRuntime, JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason(reason).build()));
        }
    }
    // Move running tasks to KillInitiated state
    for (EntityHolder taskHolder : runningView.getChildren()) {
        if (result.size() >= concurrencyLimit) {
            return result;
        }
        Task task = taskHolder.getEntity();
        TaskState state = task.getStatus().getState();
        if (state != TaskState.KillInitiated && state != TaskState.Finished) {
            result.add(reconcilerInitiatedTaskKillInitiated(engine, task, runtime, jobStore, versionSupplier, reasonCode, reason, titusRuntime));
        }
    }
    return result;
}
Also used : Completable(rx.Completable) JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) Callable(java.util.concurrent.Callable) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) ArrayList(java.util.ArrayList) Observable(rx.Observable) HashSet(java.util.HashSet) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) Job(com.netflix.titus.api.jobmanager.model.job.Job) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) VersionSuppliers(com.netflix.titus.master.jobmanager.service.VersionSuppliers) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) Task(com.netflix.titus.api.jobmanager.model.job.Task) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) HashSet(java.util.HashSet)

Aggregations

EntityHolder (com.netflix.titus.common.framework.reconciler.EntityHolder)31 ArrayList (java.util.ArrayList)17 Task (com.netflix.titus.api.jobmanager.model.job.Task)12 ChangeAction (com.netflix.titus.common.framework.reconciler.ChangeAction)12 ModelActionHolder (com.netflix.titus.common.framework.reconciler.ModelActionHolder)12 TitusChangeAction (com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction)12 List (java.util.List)10 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)7 TitusModelAction (com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction)7 Job (com.netflix.titus.api.jobmanager.model.job.Job)6 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)6 ServiceJobExt (com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt)6 ReconciliationEngine (com.netflix.titus.common.framework.reconciler.ReconciliationEngine)6 JobManagerReconcilerEvent (com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent)6 Test (org.junit.Test)6 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)5 JobStore (com.netflix.titus.api.jobmanager.store.JobStore)5 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)5 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)5 Optional (java.util.Optional)5