Search in sources :

Example 6 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class JobReconciliationFrameworkFactory method newRestoredEngine.

private InternalReconciliationEngine<JobManagerReconcilerEvent> newRestoredEngine(Job job, List<Task> tasks) {
    EntityHolder jobHolder = EntityHolder.newRoot(job.getId(), job);
    for (Task task : tasks) {
        EntityHolder taskHolder = EntityHolder.newRoot(task.getId(), task);
        EntityHolder decorated = TaskTimeoutChangeActions.setTimeoutOnRestoreFromStore(jobManagerConfiguration, taskHolder, clock);
        jobHolder = jobHolder.addChild(decorated);
    }
    return newEngine(jobHolder, false);
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 7 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class DefaultV3JobOperations method moveServiceTask.

@Override
public Observable<Void> moveServiceTask(String sourceJobId, String targetJobId, String taskId, CallMetadata callMetadata) {
    return Observable.defer(() -> {
        Pair<ReconciliationEngine<JobManagerReconcilerEvent>, EntityHolder> fromEngineTaskPair = reconciliationFramework.findEngineByChildId(taskId).orElseThrow(() -> JobManagerException.taskNotFound(taskId));
        ReconciliationEngine<JobManagerReconcilerEvent> engineFrom = fromEngineTaskPair.getLeft();
        Job<ServiceJobExt> jobFrom = engineFrom.getReferenceView().getEntity();
        if (!JobFunctions.isServiceJob(jobFrom)) {
            throw JobManagerException.notServiceJob(jobFrom.getId());
        }
        if (!jobFrom.getId().equals(sourceJobId)) {
            throw JobManagerException.taskJobMismatch(taskId, sourceJobId);
        }
        if (jobFrom.getId().equals(targetJobId)) {
            throw JobManagerException.sameJobs(jobFrom.getId());
        }
        ReconciliationEngine<JobManagerReconcilerEvent> engineTo = reconciliationFramework.findEngineByRootId(targetJobId).orElseThrow(() -> JobManagerException.jobNotFound(targetJobId));
        Job<ServiceJobExt> jobTo = engineTo.getReferenceView().getEntity();
        if (!JobFunctions.isServiceJob(jobTo)) {
            throw JobManagerException.notServiceJob(jobTo.getId());
        }
        JobCompatibility compatibility = JobCompatibility.of(jobFrom, jobTo);
        if (featureActivationConfiguration.isMoveTaskValidationEnabled() && !compatibility.isCompatible()) {
            Optional<String> diffReport = ProtobufExt.diffReport(GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorFrom()), GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorTo()));
            throw JobManagerException.notCompatible(jobFrom, jobTo, diffReport.orElse(""));
        }
        return reconciliationFramework.changeReferenceModel(new MoveTaskBetweenJobsAction(engineFrom, engineTo, taskId, store, callMetadata, versionSupplier), (rootId, modelUpdatesObservable) -> {
            String name;
            String summary;
            if (targetJobId.equals(rootId)) {
                name = "moveTask(to)";
                summary = "Moving a task to this job from job " + jobFrom.getId();
            } else {
                name = "moveTask(from)";
                summary = "Moving a task out of this job to job " + jobTo.getId();
            }
            return new TitusChangeAction(Trigger.API, rootId, null, name, summary, callMetadata) {

                @Override
                public Observable<List<ModelActionHolder>> apply() {
                    return modelUpdatesObservable;
                }
            };
        }, jobFrom.getId(), jobTo.getId());
    });
}
Also used : MoveTaskBetweenJobsAction(com.netflix.titus.master.jobmanager.service.service.action.MoveTaskBetweenJobsAction) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) JobCompatibility(com.netflix.titus.api.jobmanager.model.job.JobCompatibility) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) List(java.util.List) ArrayList(java.util.ArrayList)

Example 8 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class MoveTaskBetweenJobsAction method apply.

@Override
public Observable<Map<String, List<ModelActionHolder>>> apply() {
    return Observable.defer(() -> {
        // Validate data
        Job<ServiceJobExt> jobFrom = engineFrom.getReferenceView().getEntity();
        Job<ServiceJobExt> jobTo = engineTo.getReferenceView().getEntity();
        EntityHolder taskFromReferenceHolder = engineFrom.getReferenceView().findChildById(taskId).orElseThrow(() -> JobManagerException.taskJobMismatch(taskId, jobFrom.getId()));
        if (jobFrom.getStatus().getState() != JobState.Accepted) {
            throw JobManagerException.unexpectedJobState(jobTo, JobState.Accepted);
        }
        Capacity capacityFrom = jobFrom.getJobDescriptor().getExtensions().getCapacity();
        if (capacityFrom.getMin() >= capacityFrom.getDesired()) {
            throw JobManagerException.belowMinCapacity(jobFrom, 1);
        }
        if (jobTo.getStatus().getState() != JobState.Accepted) {
            throw JobManagerException.unexpectedJobState(jobTo, JobState.Accepted);
        }
        Capacity capacityTo = jobTo.getJobDescriptor().getExtensions().getCapacity();
        if (capacityTo.getDesired() >= capacityTo.getMax()) {
            throw JobManagerException.aboveMaxCapacity(jobTo, 1);
        }
        Task taskFromReference = taskFromReferenceHolder.getEntity();
        Optional<EntityHolder> taskFromRunningHolder = engineFrom.getRunningView().findChildById(taskId);
        // Compute new model entities
        // Decrement job size by 1
        Job<ServiceJobExt> updatedJobFrom = nextVersion(JobFunctions.incrementJobSize(jobFrom, -1), versionSupplier);
        Job<ServiceJobExt> updatedJobTo = nextVersion(JobFunctions.incrementJobSize(jobTo, 1), versionSupplier);
        Task updatedReferenceTaskTo = VersionSuppliers.nextVersion(JobFunctions.moveTask(jobFrom.getId(), jobTo.getId(), taskFromReference), versionSupplier);
        // Move the task
        return titusStore.moveTask(updatedJobFrom, updatedJobTo, updatedReferenceTaskTo).andThen(Observable.fromCallable(() -> ImmutableMap.of(jobFrom.getId(), createModelUpdateActionsFrom(updatedJobFrom, updatedJobTo, taskFromReference, callMetadata), jobTo.getId(), createModelUpdateActionsTo(updatedJobFrom, updatedJobTo, updatedReferenceTaskTo, taskFromRunningHolder, callMetadata))));
    });
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 9 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method createNewTaskAction.

private Optional<TitusChangeAction> createNewTaskAction(BatchJobView refJobView, int taskIndex, Optional<EntityHolder> previousTask, List<String> unassignedIpAllocations, List<String> ebsVolumeIds) {
    // Safety check
    long numberOfNotFinishedTasks = refJobView.getJobHolder().getChildren().stream().filter(holder -> TaskState.isRunning(((Task) holder.getEntity()).getStatus().getState())).count();
    if (numberOfNotFinishedTasks >= refJobView.getRequiredSize()) {
        titusRuntime.getCodeInvariants().inconsistent("Batch job reconciler attempts to create too many tasks: jobId=%s, requiredSize=%s, current=%s", refJobView.getJob().getId(), refJobView.getRequiredSize(), numberOfNotFinishedTasks);
        return Optional.empty();
    }
    Map<String, String> taskContext = getTaskContext(previousTask, unassignedIpAllocations, ebsVolumeIds);
    JobDescriptor jobDescriptor = refJobView.getJob().getJobDescriptor();
    ApplicationSLA capacityGroupDescriptor = JobManagerUtil.getCapacityGroupDescriptor(jobDescriptor, capacityGroupService);
    String resourcePool = capacityGroupDescriptor.getResourcePool();
    taskContext = CollectionsExt.copyAndAdd(taskContext, ImmutableMap.of(TaskAttributes.TASK_ATTRIBUTES_RESOURCE_POOL, resourcePool, TaskAttributes.TASK_ATTRIBUTES_TIER, capacityGroupDescriptor.getTier().name()));
    TitusChangeAction storeAction = storeWriteRetryInterceptor.apply(createOrReplaceTaskAction(runtime, jobStore, refJobView.getJobHolder(), taskIndex, versionSupplier, clock, taskContext));
    return Optional.of(storeAction);
}
Also used : JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) RetryActionInterceptor(com.netflix.titus.master.jobmanager.service.common.interceptor.RetryActionInterceptor) RECONCILER_CALLMETADATA(com.netflix.titus.api.jobmanager.service.JobManagerConstants.RECONCILER_CALLMETADATA) FeatureActivationConfiguration(com.netflix.titus.api.FeatureActivationConfiguration) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) BasicJobActions(com.netflix.titus.master.jobmanager.service.common.action.task.BasicJobActions) JobManagerConfiguration(com.netflix.titus.master.jobmanager.service.JobManagerConfiguration) Schedulers(rx.schedulers.Schedulers) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) JobManagerUtil(com.netflix.titus.master.jobmanager.service.JobManagerUtil) TaskRetryers(com.netflix.titus.master.jobmanager.service.common.action.TaskRetryers) Job(com.netflix.titus.api.jobmanager.model.job.Job) ImmutableMap(com.google.common.collect.ImmutableMap) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) Set(java.util.Set) Scheduler(rx.Scheduler) DifferenceResolverUtils.getUnassignedIpAllocations(com.netflix.titus.master.jobmanager.service.common.DifferenceResolverUtils.getUnassignedIpAllocations) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) List(java.util.List) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) Clock(com.netflix.titus.common.util.time.Clock) KillInitiatedActions(com.netflix.titus.master.jobmanager.service.common.action.task.KillInitiatedActions) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) ApplicationSlaManagementService(com.netflix.titus.master.service.management.ApplicationSlaManagementService) CreateOrReplaceBatchTaskActions.createOrReplaceTaskAction(com.netflix.titus.master.jobmanager.service.batch.action.CreateOrReplaceBatchTaskActions.createOrReplaceTaskAction) DifferenceResolverUtils(com.netflix.titus.master.jobmanager.service.common.DifferenceResolverUtils) Singleton(javax.inject.Singleton) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Inject(javax.inject.Inject) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ApplicationSLA(com.netflix.titus.api.model.ApplicationSLA) DifferenceResolverUtils.getUnassignedEbsVolumes(com.netflix.titus.master.jobmanager.service.common.DifferenceResolverUtils.getUnassignedEbsVolumes) Named(javax.inject.Named) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) Logger(org.slf4j.Logger) DifferenceResolverUtils.getTaskContext(com.netflix.titus.master.jobmanager.service.common.DifferenceResolverUtils.getTaskContext) Retryers(com.netflix.titus.common.util.retry.Retryers) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) TimeUnit(java.util.concurrent.TimeUnit) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) BasicTaskActions(com.netflix.titus.master.jobmanager.service.common.action.task.BasicTaskActions) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) TokenBucket(com.netflix.titus.common.util.limiter.tokenbucket.TokenBucket) Collections(java.util.Collections) Task(com.netflix.titus.api.jobmanager.model.job.Task) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) JobDescriptor(com.netflix.titus.api.jobmanager.model.job.JobDescriptor) ApplicationSLA(com.netflix.titus.api.model.ApplicationSLA) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction)

Example 10 with EntityHolder

use of com.netflix.titus.common.framework.reconciler.EntityHolder in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method applyRuntime.

private List<ChangeAction> applyRuntime(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder runningModel, EntityHolder storeModel, AtomicInteger allowedNewTasks) {
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder referenceModel = refJobView.getJobHolder();
    BatchJobView runningJobView = new BatchJobView(runningModel);
    if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.KillInitiated)) {
        List<ChangeAction> killInitiatedActions = KillInitiatedActions.reconcilerInitiatedAllTasksKillInitiated(engine, runtime, jobStore, TaskStatus.REASON_TASK_KILLED, "Killing task as its job is in KillInitiated state", configuration.getConcurrentReconcilerStoreUpdateLimit(), versionSupplier, titusRuntime);
        if (killInitiatedActions.isEmpty()) {
            return DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime);
        }
        return killInitiatedActions;
    } else if (DifferenceResolverUtils.hasJobState(referenceModel, JobState.Finished)) {
        return Collections.emptyList();
    }
    List<ChangeAction> numberOfTaskAdjustingActions = findJobSizeInconsistencies(refJobView, storeModel, allowedNewTasks);
    actions.addAll(numberOfTaskAdjustingActions);
    if (numberOfTaskAdjustingActions.isEmpty()) {
        actions.addAll(findMissingRunningTasks(engine, refJobView, runningJobView));
    }
    actions.addAll(DifferenceResolverUtils.findTaskStateTimeouts(engine, runningJobView, configuration, runtime, jobStore, versionSupplier, stuckInStateRateLimiter, titusRuntime));
    return actions;
}
Also used : TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Aggregations

EntityHolder (com.netflix.titus.common.framework.reconciler.EntityHolder)31 ArrayList (java.util.ArrayList)17 Task (com.netflix.titus.api.jobmanager.model.job.Task)12 ChangeAction (com.netflix.titus.common.framework.reconciler.ChangeAction)12 ModelActionHolder (com.netflix.titus.common.framework.reconciler.ModelActionHolder)12 TitusChangeAction (com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction)12 List (java.util.List)10 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)7 TitusModelAction (com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction)7 Job (com.netflix.titus.api.jobmanager.model.job.Job)6 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)6 ServiceJobExt (com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt)6 ReconciliationEngine (com.netflix.titus.common.framework.reconciler.ReconciliationEngine)6 JobManagerReconcilerEvent (com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent)6 Test (org.junit.Test)6 JobFunctions (com.netflix.titus.api.jobmanager.model.job.JobFunctions)5 JobStore (com.netflix.titus.api.jobmanager.store.JobStore)5 CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)5 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)5 Optional (java.util.Optional)5