use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.
the class BatchDifferenceResolver method applyRuntime.
private List<ChangeAction> applyRuntime(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder runningModel, EntityHolder storeModel, AtomicInteger allowedNewTasks) {
List<ChangeAction> actions = new ArrayList<>();
EntityHolder referenceModel = refJobView.getJobHolder();
BatchJobView runningJobView = new BatchJobView(runningModel);
if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.KillInitiated)) {
List<ChangeAction> killInitiatedActions = KillInitiatedActions.reconcilerInitiatedAllTasksKillInitiated(engine, runtime, jobStore, TaskStatus.REASON_TASK_KILLED, "Killing task as its job is in KillInitiated state", configuration.getConcurrentReconcilerStoreUpdateLimit(), versionSupplier, titusRuntime);
if (killInitiatedActions.isEmpty()) {
return DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime);
}
return killInitiatedActions;
} else if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.Finished)) {
return Collections.emptyList();
}
List<ChangeAction> numberOfTaskAdjustingActions = findJobSizeInconsistencies(refJobView, storeModel, allowedNewTasks);
actions.addAll(numberOfTaskAdjustingActions);
if (numberOfTaskAdjustingActions.isEmpty()) {
actions.addAll(findMissingRunningTasks(engine, refJobView, runningJobView));
}
actions.addAll(DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime));
return actions;
}
use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.
the class ServiceDifferenceResolver method applyRuntime.
private List<ChangeAction> applyRuntime(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, EntityHolder runningModel, EntityHolder storeModel, AtomicInteger allowedNewTasks, AtomicInteger allowedTaskKills) {
EntityHolder referenceModel = refJobView.getJobHolder();
ServiceJobView runningJobView = new ServiceJobView(runningModel);
if (hasJobState(referenceModel, JobState.KillInitiated)) {
List<ChangeAction> killInitiatedActions = KillInitiatedActions.reconcilerInitiatedAllTasksKillInitiated(engine, runtime, jobStore, TaskStatus.REASON_TASK_KILLED, "Killing task as its job is in KillInitiated state", allowedTaskKills.get(), versionSupplier, titusRuntime);
if (killInitiatedActions.isEmpty()) {
return findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime);
}
allowedTaskKills.set(allowedTaskKills.get() - killInitiatedActions.size());
return killInitiatedActions;
} else if (hasJobState(referenceModel, JobState.Finished)) {
return Collections.emptyList();
}
List<ChangeAction> actions = new ArrayList<>();
List<ChangeAction> numberOfTaskAdjustingActions = findJobSizeInconsistencies(engine, refJobView, storeModel, allowedNewTasks, allowedTaskKills);
actions.addAll(numberOfTaskAdjustingActions);
if (numberOfTaskAdjustingActions.isEmpty()) {
actions.addAll(findMissingRunningTasks(engine, refJobView, runningJobView));
}
actions.addAll(findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime));
return actions;
}
use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.
the class BatchDifferenceResolver method apply.
@Override
public List<ChangeAction> apply(ReconciliationEngine<JobManagerReconcilerEvent> engine) {
List<ChangeAction> actions = new ArrayList<>();
BatchJobView refJobView = new BatchJobView(engine.getReferenceView());
EntityHolder storeModel = engine.getStoreView();
int activeNotStartedTasks = DifferenceResolverUtils.countActiveNotStartedTasks(refJobView.getJobHolder(), engine.getRunningView());
AtomicInteger allowedNewTasks = new AtomicInteger(Math.max(0, configuration.getActiveNotStartedTasksLimit() - activeNotStartedTasks));
actions.addAll(applyStore(engine, refJobView, storeModel, allowedNewTasks));
actions.addAll(applyRuntime(engine, refJobView, engine.getRunningView(), storeModel, allowedNewTasks));
if (actions.isEmpty()) {
actions.addAll(removeCompletedJob(refJobView, engine.getReferenceView(), storeModel, jobStore, versionSupplier));
}
return actions;
}
use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.
the class BatchDifferenceResolver method applyStore.
private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
return Collections.emptyList();
}
List<ChangeAction> actions = new ArrayList<>();
EntityHolder refJobHolder = refJobView.getJobHolder();
Job<BatchJobExt> refJob = refJobHolder.getEntity();
if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
}
boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
for (EntityHolder referenceTask : refJobHolder.getChildren()) {
Optional<EntityHolder> storeHolder = storeJob.findById(referenceTask.getId());
boolean refAndStoreInSync = storeHolder.isPresent() && DifferenceResolverUtils.areEquivalent(storeHolder.get(), referenceTask);
boolean shouldRetry = !isJobTerminating && DifferenceResolverUtils.shouldRetry(refJob, referenceTask.getEntity()) && allowedNewTasks.get() > 0;
if (refAndStoreInSync) {
BatchJobTask storeTask = storeHolder.get().getEntity();
if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTask, clock)) {
logger.info("Retrying task: oldTaskId={}, index={}", referenceTask.getId(), storeTask.getIndex());
createNewTaskAction(refJobView, storeTask.getIndex(), Optional.of(referenceTask), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
}
} else {
Task task = referenceTask.getEntity();
CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
}
// Both current and delayed retries are counted
if (shouldRetry) {
allowedNewTasks.decrementAndGet();
}
}
return actions;
}
use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.
the class KillInitiatedActions method reconcilerInitiatedAllTasksKillInitiated.
/**
* For all active tasks, send terminate command to the compute provider, and change their state to {@link TaskState#KillInitiated}.
* This method is used for internal state reconciliation.
*/
public static List<ChangeAction> reconcilerInitiatedAllTasksKillInitiated(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime runtime, JobStore jobStore, String reasonCode, String reason, int concurrencyLimit, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
List<ChangeAction> result = new ArrayList<>();
EntityHolder runningView = engine.getRunningView();
Set<String> runningTaskIds = new HashSet<>();
runningView.getChildren().forEach(taskHolder -> runningTaskIds.add(taskHolder.<Task>getEntity().getId()));
// Immediately finish Accepted tasks, which are not yet in the running model.
for (EntityHolder entityHolder : engine.getReferenceView().getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = entityHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state == TaskState.Accepted && !runningTaskIds.contains(task.getId())) {
result.add(BasicTaskActions.updateTaskAndWriteItToStore(task.getId(), engine, taskRef -> JobFunctions.changeTaskStatus(taskRef, TaskState.Finished, reasonCode, reason, titusRuntime.getClock()), jobStore, V3JobOperations.Trigger.Reconciler, reason, versionSupplier, titusRuntime, JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason(reason).build()));
}
}
// Move running tasks to KillInitiated state
for (EntityHolder taskHolder : runningView.getChildren()) {
if (result.size() >= concurrencyLimit) {
return result;
}
Task task = taskHolder.getEntity();
TaskState state = task.getStatus().getState();
if (state != TaskState.KillInitiated && state != TaskState.Finished) {
result.add(reconcilerInitiatedTaskKillInitiated(engine, task, runtime, jobStore, versionSupplier, reasonCode, reason, titusRuntime));
}
}
return result;
}
Aggregations