use of com.netflix.titus.common.framework.reconciler.ReconciliationEngine in project titus-control-plane by Netflix.
the class DefaultReconciliationFramework method changeReferenceModel.
@Override
public Observable<Void> changeReferenceModel(MultiEngineChangeAction multiEngineChangeAction, BiFunction<String, Observable<List<ModelActionHolder>>, ChangeAction> engineChangeActionFactory, String... rootEntityHolderIds) {
Preconditions.checkArgument(rootEntityHolderIds.length > 1, "Change action for multiple engines requested, but %s root id holders provided", rootEntityHolderIds.length);
return Observable.create(emitter -> {
List<ReconciliationEngine<EVENT>> engines = new ArrayList<>();
for (String id : rootEntityHolderIds) {
ReconciliationEngine<EVENT> engine = findEngineByRootId(id).orElseThrow(() -> new IllegalArgumentException("Reconciliation engine not found: rootId=" + id));
engines.add(engine);
}
List<Observable<Map<String, List<ModelActionHolder>>>> outputs = ObservableExt.propagate(multiEngineChangeAction.apply(), engines.size());
List<Observable<Void>> engineActions = new ArrayList<>();
for (int i = 0; i < engines.size(); i++) {
ReconciliationEngine<EVENT> engine = engines.get(i);
String rootId = engine.getReferenceView().getId();
ChangeAction engineAction = engineChangeActionFactory.apply(rootId, outputs.get(i).map(r -> r.get(rootId)));
engineActions.add(engine.changeReferenceModel(engineAction));
}
// Synchronize on subscription to make sure that this operation is not interleaved with concurrent
// subscriptions for the same set or subset of the reconciliation engines. The interleaving might result
// in a deadlock. For example with two engines engineA and engineB:
// - multi-engine change action M1 for engineA and engineB is scheduled
// - M1/engineA is added to its queue
// - another multi-engine change action M2 for engineA and engineB is scheduled
// - M2/engineB is added to its queue
// - M1/engineB is added to its queue, and next M2/engineA
// Executing M1 requires that both M1/engineA and M1/engineB are at the top of the queue, but in this case
// M2/engineB is ahead of the M1/engineB. On the other hand, M1/engineA is ahead of M2/engineB. Because
// of that we have deadlock. Please, note that we can ignore here the regular (engine scoped) change actions.
Subscription subscription;
synchronized (multiEngineChangeLock) {
subscription = Observable.mergeDelayError(engineActions).subscribe(emitter::onNext, emitter::onError, emitter::onCompleted);
}
emitter.setSubscription(subscription);
}, Emitter.BackpressureMode.NONE);
}
use of com.netflix.titus.common.framework.reconciler.ReconciliationEngine in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method moveServiceTask.
@Override
public Observable<Void> moveServiceTask(String sourceJobId, String targetJobId, String taskId, CallMetadata callMetadata) {
return Observable.defer(() -> {
Pair<ReconciliationEngine<JobManagerReconcilerEvent>, EntityHolder> fromEngineTaskPair = reconciliationFramework.findEngineByChildId(taskId).orElseThrow(() -> JobManagerException.taskNotFound(taskId));
ReconciliationEngine<JobManagerReconcilerEvent> engineFrom = fromEngineTaskPair.getLeft();
Job<ServiceJobExt> jobFrom = engineFrom.getReferenceView().getEntity();
if (!JobFunctions.isServiceJob(jobFrom)) {
throw JobManagerException.notServiceJob(jobFrom.getId());
}
if (!jobFrom.getId().equals(sourceJobId)) {
throw JobManagerException.taskJobMismatch(taskId, sourceJobId);
}
if (jobFrom.getId().equals(targetJobId)) {
throw JobManagerException.sameJobs(jobFrom.getId());
}
ReconciliationEngine<JobManagerReconcilerEvent> engineTo = reconciliationFramework.findEngineByRootId(targetJobId).orElseThrow(() -> JobManagerException.jobNotFound(targetJobId));
Job<ServiceJobExt> jobTo = engineTo.getReferenceView().getEntity();
if (!JobFunctions.isServiceJob(jobTo)) {
throw JobManagerException.notServiceJob(jobTo.getId());
}
JobCompatibility compatibility = JobCompatibility.of(jobFrom, jobTo);
if (featureActivationConfiguration.isMoveTaskValidationEnabled() && !compatibility.isCompatible()) {
Optional<String> diffReport = ProtobufExt.diffReport(GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorFrom()), GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorTo()));
throw JobManagerException.notCompatible(jobFrom, jobTo, diffReport.orElse(""));
}
return reconciliationFramework.changeReferenceModel(new MoveTaskBetweenJobsAction(engineFrom, engineTo, taskId, store, callMetadata, versionSupplier), (rootId, modelUpdatesObservable) -> {
String name;
String summary;
if (targetJobId.equals(rootId)) {
name = "moveTask(to)";
summary = "Moving a task to this job from job " + jobFrom.getId();
} else {
name = "moveTask(from)";
summary = "Moving a task out of this job to job " + jobTo.getId();
}
return new TitusChangeAction(Trigger.API, rootId, null, name, summary, callMetadata) {
@Override
public Observable<List<ModelActionHolder>> apply() {
return modelUpdatesObservable;
}
};
}, jobFrom.getId(), jobTo.getId());
});
}
use of com.netflix.titus.common.framework.reconciler.ReconciliationEngine in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method updateTask.
@Override
public Completable updateTask(String taskId, Function<Task, Optional<Task>> changeFunction, Trigger trigger, String reason, CallMetadata callMetadata) {
Optional<ReconciliationEngine<JobManagerReconcilerEvent>> engineOpt = reconciliationFramework.findEngineByChildId(taskId).map(Pair::getLeft);
if (!engineOpt.isPresent()) {
return Completable.error(JobManagerException.taskNotFound(taskId));
}
ReconciliationEngine<JobManagerReconcilerEvent> engine = engineOpt.get();
TitusChangeAction changeAction = BasicTaskActions.updateTaskInRunningModel(taskId, trigger, jobManagerConfiguration, engine, changeFunction, reason, versionSupplier, titusRuntime, callMetadata);
return engine.changeReferenceModel(changeAction, taskId).toCompletable();
}
use of com.netflix.titus.common.framework.reconciler.ReconciliationEngine in project titus-control-plane by Netflix.
the class BasicTaskActions method writeReferenceTaskToStore.
/**
* Write updated task record to a store. If a task is completed, remove it from the scheduling service.
* This command calls {@link JobStore#updateTask(Task)}, which assumes that the task record was created already.
*/
public static TitusChangeAction writeReferenceTaskToStore(JobStore titusStore, ReconciliationEngine<JobManagerReconcilerEvent> engine, String taskId, CallMetadata callMetadata, TitusRuntime titusRuntime) {
return TitusChangeAction.newAction("writeReferenceTaskToStore").trigger(V3JobOperations.Trigger.Reconciler).id(taskId).summary("Persisting task to the store").callMetadata(callMetadata).changeWithModelUpdate(self -> {
Optional<EntityHolder> taskHolder = engine.getReferenceView().findById(taskId);
if (!taskHolder.isPresent()) {
// Should never happen
titusRuntime.getCodeInvariants().inconsistent("Reference task with id %s not found.", taskId);
return Observable.empty();
}
Task referenceTask = taskHolder.get().getEntity();
return titusStore.updateTask(referenceTask).andThen(Observable.fromCallable(() -> {
TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(storeRoot -> {
EntityHolder storedHolder = EntityHolder.newRoot(referenceTask.getId(), referenceTask);
return Pair.of(storeRoot.addChild(storedHolder), storedHolder);
});
return ModelActionHolder.store(modelUpdateAction);
}));
});
}
use of com.netflix.titus.common.framework.reconciler.ReconciliationEngine in project titus-control-plane by Netflix.
the class KillInitiatedActions method reconcilerInitiatedAllTasksKillInitiated.
/**
* For all active tasks, send terminate command to the compute provider, and change their state to {@link TaskState#KillInitiated}.
* This method is used for internal state reconciliation.
*/
public static List<ChangeAction> reconcilerInitiatedAllTasksKillInitiated(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime runtime, JobStore jobStore, String reasonCode, String reason, int concurrencyLimit, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
List<ChangeAction> result = new ArrayList<>();
EntityHolder runningView = engine.getRunningView();
Set<String> runningTaskIds = new HashSet<>();
runningView.getChildren().forEach(taskHolder -> runningTaskIds.add(taskHolder.<Task>getEntity().getId()));
// Immediately finish Accepted tasks, which are not yet in the running model.
for (EntityHolder entityHolder : engine.getReferenceView().getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = entityHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state == TaskState.Accepted && !runningTaskIds.contains(task.getId())) {
result.add(BasicTaskActions.updateTaskAndWriteItToStore(task.getId(), engine, taskRef -> JobFunctions.changeTaskStatus(taskRef, TaskState.Finished, reasonCode, reason, titusRuntime.getClock()), jobStore, V3JobOperations.Trigger.Reconciler, reason, versionSupplier, titusRuntime, JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason(reason).build()));
}
}
// Move running tasks to KillInitiated state
for (EntityHolder taskHolder : runningView.getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = taskHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state != TaskState.KillInitiated && state != TaskState.Finished) {
result.add(reconcilerInitiatedTaskKillInitiated(engine, task, runtime, jobStore, versionSupplier, reasonCode, reason, titusRuntime));
}
}
return result;
}
Aggregations