use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.
the class BasicTaskActions method updateTaskInRunningModel.
/**
* Update a task in the reference and running models. If a task moves to Finished state, add retry delay information
* to the task, and to task entity holder (see {@link TaskRetryers}).
*/
public static TitusChangeAction updateTaskInRunningModel(String taskId, Trigger trigger, JobManagerConfiguration configuration, ReconciliationEngine<JobManagerReconcilerEvent> engine, Function<Task, Optional<Task>> changeFunction, String reason, VersionSupplier versionSupplier, TitusRuntime titusRuntime, CallMetadata callMetadata) {
return TitusChangeAction.newAction("updateTaskInRunningModel").id(taskId).trigger(trigger).summary(reason).callMetadata(callMetadata).applyModelUpdates(self -> {
Optional<EntityHolder> taskOptional = JobEntityHolders.expectTaskHolder(engine, taskId, titusRuntime);
if (!taskOptional.isPresent()) {
return Collections.emptyList();
}
EntityHolder taskHolder = taskOptional.get();
Task oldTask = taskHolder.getEntity();
Optional<Task> maybeNewTask = changeFunction.apply(oldTask);
if (!maybeNewTask.isPresent()) {
return Collections.emptyList();
}
Task newTask = VersionSuppliers.nextVersion(maybeNewTask.get(), versionSupplier);
// Handle separately reference and runtime models, as only reference model gets retry attributes.
List<ModelActionHolder> modelActionHolders = new ArrayList<>();
// Add retryer data to task context.
if (newTask.getStatus().getState() == TaskState.Finished) {
modelActionHolders.add(ModelActionHolder.reference(attachRetryer(self, taskHolder, newTask, callMetadata, configuration, titusRuntime)));
} else {
modelActionHolders.add(ModelActionHolder.reference(TitusModelAction.newModelUpdate(self).taskUpdate(newTask)));
}
modelActionHolders.add(ModelActionHolder.running(TitusModelAction.newModelUpdate(self).taskUpdate(newTask)));
return modelActionHolders;
});
}
use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.
the class BasicTaskActions method attachRetryer.
private static TitusModelAction attachRetryer(TitusChangeAction.Builder self, EntityHolder taskHolder, Task updatedTask, CallMetadata callMetadata, JobManagerConfiguration configuration, TitusRuntime titusRuntime) {
long retryDelayMs = TaskRetryers.getCurrentRetryerDelayMs(taskHolder, configuration.getMinRetryIntervalMs(), configuration.getTaskRetryerResetTimeMs(), titusRuntime.getClock());
String retryDelayString = DateTimeExt.toTimeUnitString(retryDelayMs);
updatedTask = updatedTask.toBuilder().addToTaskContext(TaskAttributes.TASK_ATTRIBUTES_RETRY_DELAY, retryDelayString).build();
EntityHolder newTaskHolder = taskHolder.setEntity(updatedTask).addTag(TaskRetryers.ATTR_TASK_RETRY_DELAY_MS, retryDelayMs);
return TitusModelAction.newModelUpdate(self).summary("Setting retry delay on task in Finished state: %s", retryDelayString).callMetadata(callMetadata).addTaskHolder(newTaskHolder);
}
use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.
the class BasicTaskActions method launchTaskInKube.
/**
* Create pod for a task.
*/
public static TitusChangeAction launchTaskInKube(JobManagerConfiguration configuration, JobServiceRuntime runtime, ReconciliationEngine<JobManagerReconcilerEvent> engine, Job<?> job, Task task, CallMetadata callMetadata, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
return TitusChangeAction.newAction("launchTaskInKube").task(task).trigger(V3JobOperations.Trigger.Reconciler).summary("Adding task to Kube").callMetadata(callMetadata).changeWithModelUpdates(self -> {
EntityHolder taskHolder = JobEntityHolders.expectTaskHolder(engine, task.getId(), titusRuntime).orElse(null);
if (taskHolder == null) {
// This should never happen.
return Observable.just(Collections.emptyList());
}
return ReactorExt.toCompletable(runtime.getComputeProvider().launchTask(job, task).then()).andThen(Observable.fromCallable(() -> {
TaskStatus taskStatus = JobModel.newTaskStatus().withState(TaskState.Accepted).withReasonCode(TaskStatus.REASON_POD_CREATED).withReasonMessage("Created pod in Kubernetes via KubeScheduler. Needs to be scheduled on a node.").withTimestamp(titusRuntime.getClock().wallTime()).build();
Task taskWithPod = task.toBuilder().withTaskContext(CollectionsExt.copyAndAdd(task.getTaskContext(), TaskAttributes.TASK_ATTRIBUTES_POD_CREATED, "true")).withStatus(taskStatus).withStatusHistory(CollectionsExt.copyAndAdd(task.getStatusHistory(), task.getStatus())).build();
taskWithPod = VersionSuppliers.nextVersion(taskWithPod, versionSupplier);
TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(taskWithPod);
return ModelActionHolder.referenceAndRunning(modelUpdateAction);
})).onErrorReturn(error -> {
// Move task to the finished state after we failed to create a pod object for it.
String reasonCode = runtime.getComputeProvider().resolveReasonCode(error);
Task finishedTask = JobFunctions.changeTaskStatus(task, JobModel.newTaskStatus().withState(TaskState.Finished).withReasonCode(reasonCode).withReasonMessage("Failed to create pod: " + ExceptionExt.toMessageChain(error)).withTimestamp(titusRuntime.getClock().wallTime()).build());
finishedTask = VersionSuppliers.nextVersion(finishedTask, versionSupplier);
List<ModelActionHolder> modelActionHolders = new ArrayList<>();
modelActionHolders.add(ModelActionHolder.reference(attachRetryer(self, taskHolder, finishedTask, callMetadata, configuration, titusRuntime)));
modelActionHolders.add(ModelActionHolder.running(TitusModelAction.newModelUpdate(self).taskUpdate(finishedTask)));
return modelActionHolders;
});
});
}
use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.
the class BasicTaskActions method writeReferenceTaskToStore.
/**
* Write updated task record to a store. If a task is completed, remove it from the scheduling service.
* This command calls {@link JobStore#updateTask(Task)}, which assumes that the task record was created already.
*/
public static TitusChangeAction writeReferenceTaskToStore(JobStore titusStore, ReconciliationEngine<JobManagerReconcilerEvent> engine, String taskId, CallMetadata callMetadata, TitusRuntime titusRuntime) {
return TitusChangeAction.newAction("writeReferenceTaskToStore").trigger(V3JobOperations.Trigger.Reconciler).id(taskId).summary("Persisting task to the store").callMetadata(callMetadata).changeWithModelUpdate(self -> {
Optional<EntityHolder> taskHolder = engine.getReferenceView().findById(taskId);
if (!taskHolder.isPresent()) {
// Should never happen
titusRuntime.getCodeInvariants().inconsistent("Reference task with id %s not found.", taskId);
return Observable.empty();
}
Task referenceTask = taskHolder.get().getEntity();
return titusStore.updateTask(referenceTask).andThen(Observable.fromCallable(() -> {
TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(storeRoot -> {
EntityHolder storedHolder = EntityHolder.newRoot(referenceTask.getId(), referenceTask);
return Pair.of(storeRoot.addChild(storedHolder), storedHolder);
});
return ModelActionHolder.store(modelUpdateAction);
}));
});
}
use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.
the class KillInitiatedActions method reconcilerInitiatedAllTasksKillInitiated.
/**
* For all active tasks, send terminate command to the compute provider, and change their state to {@link TaskState#KillInitiated}.
* This method is used for internal state reconciliation.
*/
public static List<ChangeAction> reconcilerInitiatedAllTasksKillInitiated(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime runtime, JobStore jobStore, String reasonCode, String reason, int concurrencyLimit, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
List<ChangeAction> result = new ArrayList<>();
EntityHolder runningView = engine.getRunningView();
Set<String> runningTaskIds = new HashSet<>();
runningView.getChildren().forEach(taskHolder -> runningTaskIds.add(taskHolder.<Task>getEntity().getId()));
// Immediately finish Accepted tasks, which are not yet in the running model.
for (EntityHolder entityHolder : engine.getReferenceView().getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = entityHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state == TaskState.Accepted && !runningTaskIds.contains(task.getId())) {
result.add(BasicTaskActions.updateTaskAndWriteItToStore(task.getId(), engine, taskRef -> JobFunctions.changeTaskStatus(taskRef, TaskState.Finished, reasonCode, reason, titusRuntime.getClock()), jobStore, V3JobOperations.Trigger.Reconciler, reason, versionSupplier, titusRuntime, JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason(reason).build()));
}
}
// Move running tasks to KillInitiated state
for (EntityHolder taskHolder : runningView.getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = taskHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state != TaskState.KillInitiated && state != TaskState.Finished) {
result.add(reconcilerInitiatedTaskKillInitiated(engine, task, runtime, jobStore, versionSupplier, reasonCode, reason, titusRuntime));
}
}
return result;
}
Aggregations