Search in sources :

Example 6 with ChangeAction

use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method applyRuntime.

private List<ChangeAction> applyRuntime(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder runningModel, EntityHolder storeModel, AtomicInteger allowedNewTasks) {
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder referenceModel = refJobView.getJobHolder();
    BatchJobView runningJobView = new BatchJobView(runningModel);
    if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.KillInitiated)) {
        List<ChangeAction> killInitiatedActions = KillInitiatedActions.reconcilerInitiatedAllTasksKillInitiated(engine, runtime, jobStore, TaskStatus.REASON_TASK_KILLED, "Killing task as its job is in KillInitiated state", configuration.getConcurrentReconcilerStoreUpdateLimit(), versionSupplier, titusRuntime);
        if (killInitiatedActions.isEmpty()) {
            return DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime);
        }
        return killInitiatedActions;
    } else if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.Finished)) {
        return Collections.emptyList();
    }
    List<ChangeAction> numberOfTaskAdjustingActions = findJobSizeInconsistencies(refJobView, storeModel, allowedNewTasks);
    actions.addAll(numberOfTaskAdjustingActions);
    if (numberOfTaskAdjustingActions.isEmpty()) {
        actions.addAll(findMissingRunningTasks(engine, refJobView, runningJobView));
    }
    actions.addAll(DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime));
    return actions;
}
Also used : TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 7 with ChangeAction

use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.

the class ServiceDifferenceResolver method applyRuntime.

private List<ChangeAction> applyRuntime(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, EntityHolder runningModel, EntityHolder storeModel, AtomicInteger allowedNewTasks, AtomicInteger allowedTaskKills) {
    EntityHolder referenceModel = refJobView.getJobHolder();
    ServiceJobView runningJobView = new ServiceJobView(runningModel);
    if (hasJobState(referenceModel, JobState.KillInitiated)) {
        List<ChangeAction> killInitiatedActions = KillInitiatedActions.reconcilerInitiatedAllTasksKillInitiated(engine, runtime, jobStore, TaskStatus.REASON_TASK_KILLED, "Killing task as its job is in KillInitiated state", allowedTaskKills.get(), versionSupplier, titusRuntime);
        if (killInitiatedActions.isEmpty()) {
            return findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime);
        }
        allowedTaskKills.set(allowedTaskKills.get() - killInitiatedActions.size());
        return killInitiatedActions;
    } else if (hasJobState(referenceModel, JobState.Finished)) {
        return Collections.emptyList();
    }
    List<ChangeAction> actions = new ArrayList<>();
    List<ChangeAction> numberOfTaskAdjustingActions = findJobSizeInconsistencies(engine, refJobView, storeModel, allowedNewTasks, allowedTaskKills);
    actions.addAll(numberOfTaskAdjustingActions);
    if (numberOfTaskAdjustingActions.isEmpty()) {
        actions.addAll(findMissingRunningTasks(engine, refJobView, runningJobView));
    }
    actions.addAll(findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime));
    return actions;
}
Also used : TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 8 with ChangeAction

use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method apply.

@Override
public List<ChangeAction> apply(ReconciliationEngine<JobManagerReconcilerEvent> engine) {
    List<ChangeAction> actions = new ArrayList<>();
    BatchJobView refJobView = new BatchJobView(engine.getReferenceView());
    EntityHolder storeModel = engine.getStoreView();
    int activeNotStartedTasks = DifferenceResolverUtils.countActiveNotStartedTasks(refJobView.getJobHolder(), engine.getRunningView());
    AtomicInteger allowedNewTasks = new AtomicInteger(Math.max(0, configuration.getActiveNotStartedTasksLimit() - activeNotStartedTasks));
    actions.addAll(applyStore(engine, refJobView, storeModel, allowedNewTasks));
    actions.addAll(applyRuntime(engine, refJobView, engine.getRunningView(), storeModel, allowedNewTasks));
    if (actions.isEmpty()) {
        actions.addAll(removeCompletedJob(refJobView, engine.getReferenceView(), storeModel, jobStore, versionSupplier));
    }
    return actions;
}
Also used : TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 9 with ChangeAction

use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method applyStore.

private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
    if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
        return Collections.emptyList();
    }
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder refJobHolder = refJobView.getJobHolder();
    Job<BatchJobExt> refJob = refJobHolder.getEntity();
    if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
        actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
    }
    boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
    for (EntityHolder referenceTask : refJobHolder.getChildren()) {
        Optional<EntityHolder> storeHolder = storeJob.findById(referenceTask.getId());
        boolean refAndStoreInSync = storeHolder.isPresent() && DifferenceResolverUtils.areEquivalent(storeHolder.get(), referenceTask);
        boolean shouldRetry = !isJobTerminating && DifferenceResolverUtils.shouldRetry(refJob, referenceTask.getEntity()) && allowedNewTasks.get() > 0;
        if (refAndStoreInSync) {
            BatchJobTask storeTask = storeHolder.get().getEntity();
            if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTask, clock)) {
                logger.info("Retrying task: oldTaskId={}, index={}", referenceTask.getId(), storeTask.getIndex());
                createNewTaskAction(refJobView, storeTask.getIndex(), Optional.of(referenceTask), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
            }
        } else {
            Task task = referenceTask.getEntity();
            CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
            actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
        }
        // Both current and delayed retries are counted
        if (shouldRetry) {
            allowedNewTasks.decrementAndGet();
        }
    }
    return actions;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ArrayList(java.util.ArrayList) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 10 with ChangeAction

use of com.netflix.titus.common.framework.reconciler.ChangeAction in project titus-control-plane by Netflix.

the class KillInitiatedActions method reconcilerInitiatedAllTasksKillInitiated.

/**
 * For all active tasks, send terminate command to the compute provider, and change their state to {@link TaskState#KillInitiated}.
 * This method is used for internal state reconciliation.
 */
public static List<ChangeAction> reconcilerInitiatedAllTasksKillInitiated(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime runtime, JobStore jobStore, String reasonCode, String reason, int concurrencyLimit, VersionSupplier versionSupplier, TitusRuntime titusRuntime) {
    List<ChangeAction> result = new ArrayList<>();
    EntityHolder runningView = engine.getRunningView();
    Set<String> runningTaskIds = new HashSet<>();
    runningView.getChildren().forEach(taskHolder -> runningTaskIds.add(taskHolder.<Task>getEntity().getId()));
    // Immediately finish Accepted tasks, which are not yet in the running model.
    for (EntityHolder entityHolder : engine.getReferenceView().getChildren()) {
        if (result.size() >= concurrencyLimit) {
            return result;
        }
        Task task = entityHolder.getEntity();
        TaskState state = task.getStatus().getState();
        if (state == TaskState.Accepted && !runningTaskIds.contains(task.getId())) {
            result.add(BasicTaskActions.updateTaskAndWriteItToStore(task.getId(), engine, taskRef -> JobFunctions.changeTaskStatus(taskRef, TaskState.Finished, reasonCode, reason, titusRuntime.getClock()), jobStore, V3JobOperations.Trigger.Reconciler, reason, versionSupplier, titusRuntime, JobManagerConstants.RECONCILER_CALLMETADATA.toBuilder().withCallReason(reason).build()));
        }
    }
    // Move running tasks to KillInitiated state
    for (EntityHolder taskHolder : runningView.getChildren()) {
        if (result.size() >= concurrencyLimit) {
            return result;
        }
        Task task = taskHolder.getEntity();
        TaskState state = task.getStatus().getState();
        if (state != TaskState.KillInitiated && state != TaskState.Finished) {
            result.add(reconcilerInitiatedTaskKillInitiated(engine, task, runtime, jobStore, versionSupplier, reasonCode, reason, titusRuntime));
        }
    }
    return result;
}
Also used : Completable(rx.Completable) JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) Callable(java.util.concurrent.Callable) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) ArrayList(java.util.ArrayList) Observable(rx.Observable) HashSet(java.util.HashSet) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) Job(com.netflix.titus.api.jobmanager.model.job.Job) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) VersionSuppliers(com.netflix.titus.master.jobmanager.service.VersionSuppliers) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) Task(com.netflix.titus.api.jobmanager.model.job.Task) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) HashSet(java.util.HashSet)

Aggregations

ChangeAction (com.netflix.titus.common.framework.reconciler.ChangeAction)17 ArrayList (java.util.ArrayList)13 EntityHolder (com.netflix.titus.common.framework.reconciler.EntityHolder)12 TitusChangeAction (com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction)10 List (java.util.List)7 Task (com.netflix.titus.api.jobmanager.model.job.Task)5 ModelActionHolder (com.netflix.titus.common.framework.reconciler.ModelActionHolder)5 MultiEngineChangeAction (com.netflix.titus.common.framework.reconciler.MultiEngineChangeAction)5 Observable (rx.Observable)5 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)4 ServiceJobExt (com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt)4 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)4 ReconciliationEngine (com.netflix.titus.common.framework.reconciler.ReconciliationEngine)4 HashSet (java.util.HashSet)4 Optional (java.util.Optional)4 Set (java.util.Set)4 Job (com.netflix.titus.api.jobmanager.model.job.Job)3 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)3 JobState (com.netflix.titus.api.jobmanager.model.job.JobState)3 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)3