Search in sources :

Example 21 with CallMetadata

use of com.netflix.titus.api.model.callmetadata.CallMetadata in project titus-control-plane by Netflix.

the class DefaultAppAutoScalingCallbackService method getScalableTargetResourceInfo.

@Override
public Observable<ScalableTargetResourceInfo> getScalableTargetResourceInfo(String jobId, CallMetadata callMetadata) {
    TaskQuery taskQuery = TaskQuery.newBuilder().putFilteringCriteria("jobIds", jobId).putFilteringCriteria("taskStates", "Started").setPage(Page.newBuilder().setPageSize(1).build()).build();
    return jobServiceGateway.findTasks(taskQuery, callMetadata).map(taskQueryResult -> taskQueryResult.getPagination().getTotalItems()).flatMap(numStartedTasks -> jobServiceGateway.findJob(jobId, callMetadata).map(job -> Pair.of(job, numStartedTasks))).flatMap(jobTasksPair -> {
        Job job = jobTasksPair.getLeft();
        Integer numRunningTasks = jobTasksPair.getRight();
        if (!job.getJobDescriptor().hasService()) {
            return Observable.error(JobManagerException.notServiceJob(jobId));
        }
        ServiceJobSpec jobSpec = job.getJobDescriptor().getService();
        ScalableTargetResourceInfo.Builder scalableTargetResourceInfoBuilder = ScalableTargetResourceInfo.newBuilder().actualCapacity(jobTasksPair.getRight()).desiredCapacity(jobSpec.getCapacity().getDesired()).dimensionName(DIMENSION_NAME).resourceName(jobId).scalableTargetDimensionId(jobId).version(buildVersion(job));
        if (jobSpec.getCapacity().getDesired() != numRunningTasks) {
            scalableTargetResourceInfoBuilder.scalingStatus(ScalingStatus.InProgress.name());
        } else {
            scalableTargetResourceInfoBuilder.scalingStatus(ScalingStatus.Successful.name());
        }
        return Observable.just(scalableTargetResourceInfoBuilder.build());
    });
}
Also used : Logger(org.slf4j.Logger) ServiceJobSpec(com.netflix.titus.grpc.protogen.ServiceJobSpec) UInt32Value(com.google.protobuf.UInt32Value) JobStatus(com.netflix.titus.grpc.protogen.JobStatus) LoggerFactory(org.slf4j.LoggerFactory) Job(com.netflix.titus.grpc.protogen.Job) Singleton(javax.inject.Singleton) ArrayList(java.util.ArrayList) Page(com.netflix.titus.grpc.protogen.Page) Observable(rx.Observable) Inject(javax.inject.Inject) List(java.util.List) Pair(com.netflix.titus.common.util.tuple.Pair) Optional(java.util.Optional) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) JobServiceGateway(com.netflix.titus.runtime.jobmanager.gateway.JobServiceGateway) JobCapacityWithOptionalAttributes(com.netflix.titus.grpc.protogen.JobCapacityWithOptionalAttributes) TaskQuery(com.netflix.titus.grpc.protogen.TaskQuery) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) JobCapacityUpdateWithOptionalAttributes(com.netflix.titus.grpc.protogen.JobCapacityUpdateWithOptionalAttributes) ServiceJobSpec(com.netflix.titus.grpc.protogen.ServiceJobSpec) TaskQuery(com.netflix.titus.grpc.protogen.TaskQuery) Job(com.netflix.titus.grpc.protogen.Job)

Example 22 with CallMetadata

use of com.netflix.titus.api.model.callmetadata.CallMetadata in project titus-control-plane by Netflix.

the class BatchDifferenceResolver method applyStore.

private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, BatchJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
    if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
        return Collections.emptyList();
    }
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder refJobHolder = refJobView.getJobHolder();
    Job<BatchJobExt> refJob = refJobHolder.getEntity();
    if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
        actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
    }
    boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
    for (EntityHolder referenceTask : refJobHolder.getChildren()) {
        Optional<EntityHolder> storeHolder = storeJob.findById(referenceTask.getId());
        boolean refAndStoreInSync = storeHolder.isPresent() && DifferenceResolverUtils.areEquivalent(storeHolder.get(), referenceTask);
        boolean shouldRetry = !isJobTerminating && DifferenceResolverUtils.shouldRetry(refJob, referenceTask.getEntity()) && allowedNewTasks.get() > 0;
        if (refAndStoreInSync) {
            BatchJobTask storeTask = storeHolder.get().getEntity();
            if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTask, clock)) {
                logger.info("Retrying task: oldTaskId={}, index={}", referenceTask.getId(), storeTask.getIndex());
                createNewTaskAction(refJobView, storeTask.getIndex(), Optional.of(referenceTask), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
            }
        } else {
            Task task = referenceTask.getEntity();
            CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
            actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
        }
        // Both current and delayed retries are counted
        if (shouldRetry) {
            allowedNewTasks.decrementAndGet();
        }
    }
    return actions;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) BatchJobExt(com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt) ArrayList(java.util.ArrayList) BatchJobTask(com.netflix.titus.api.jobmanager.model.job.BatchJobTask) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 23 with CallMetadata

use of com.netflix.titus.api.model.callmetadata.CallMetadata in project titus-control-plane by Netflix.

the class BasicTaskActions method writeReferenceTaskToStore.

/**
 * Write updated task record to a store. If a task is completed, remove it from the scheduling service.
 * This command calls {@link JobStore#updateTask(Task)}, which assumes that the task record was created already.
 */
public static TitusChangeAction writeReferenceTaskToStore(JobStore titusStore, ReconciliationEngine<JobManagerReconcilerEvent> engine, String taskId, CallMetadata callMetadata, TitusRuntime titusRuntime) {
    return TitusChangeAction.newAction("writeReferenceTaskToStore").trigger(V3JobOperations.Trigger.Reconciler).id(taskId).summary("Persisting task to the store").callMetadata(callMetadata).changeWithModelUpdate(self -> {
        Optional<EntityHolder> taskHolder = engine.getReferenceView().findById(taskId);
        if (!taskHolder.isPresent()) {
            // Should never happen
            titusRuntime.getCodeInvariants().inconsistent("Reference task with id %s not found.", taskId);
            return Observable.empty();
        }
        Task referenceTask = taskHolder.get().getEntity();
        return titusStore.updateTask(referenceTask).andThen(Observable.fromCallable(() -> {
            TitusModelAction modelUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(storeRoot -> {
                EntityHolder storedHolder = EntityHolder.newRoot(referenceTask.getId(), referenceTask);
                return Pair.of(storeRoot.addChild(storedHolder), storedHolder);
            });
            return ModelActionHolder.store(modelUpdateAction);
        }));
    });
}
Also used : Trigger(com.netflix.titus.api.jobmanager.service.V3JobOperations.Trigger) DateTimeExt(com.netflix.titus.common.util.DateTimeExt) JobModel(com.netflix.titus.api.jobmanager.model.job.JobModel) JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Observable(rx.Observable) Pair(com.netflix.titus.common.util.tuple.Pair) JobManagerConfiguration(com.netflix.titus.master.jobmanager.service.JobManagerConfiguration) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) ExceptionExt(com.netflix.titus.common.util.ExceptionExt) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) TaskRetryers(com.netflix.titus.master.jobmanager.service.common.action.TaskRetryers) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) VersionSuppliers(com.netflix.titus.master.jobmanager.service.VersionSuppliers) TaskAttributes(com.netflix.titus.api.jobmanager.TaskAttributes) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder)

Example 24 with CallMetadata

use of com.netflix.titus.api.model.callmetadata.CallMetadata in project titus-control-plane by Netflix.

the class KillInitiatedActions method userInitiateTaskKillAction.

/**
 * Change a task to {@link TaskState#KillInitiated} state, store it, and send the kill command to the compute provider.
 * All models are updated when both operations complete.
 * This method is used for user initiated kill operations, so the store operation happens before response is sent back to the user.
 */
public static ChangeAction userInitiateTaskKillAction(ReconciliationEngine<JobManagerReconcilerEvent> engine, JobServiceRuntime executionContext, JobStore jobStore, VersionSupplier versionSupplier, String taskId, boolean shrink, boolean preventMinSizeUpdate, String reasonCode, String reason, TitusRuntime titusRuntime, CallMetadata callMetadata) {
    return TitusChangeAction.newAction("userInitiateTaskKill").id(taskId).trigger(V3JobOperations.Trigger.API).summary(reason).callMetadata(callMetadata).changeWithModelUpdates(self -> JobEntityHolders.toTaskObservable(engine, taskId, titusRuntime).flatMap(task -> {
        TaskState taskState = task.getStatus().getState();
        if (taskState == TaskState.KillInitiated || taskState == TaskState.Finished) {
            return Observable.just(Collections.<ModelActionHolder>emptyList());
        }
        if (shrink) {
            Job<ServiceJobExt> job = engine.getReferenceView().getEntity();
            Capacity capacity = job.getJobDescriptor().getExtensions().getCapacity();
            if (preventMinSizeUpdate && capacity.getDesired() <= capacity.getMin()) {
                return Observable.<List<ModelActionHolder>>error(JobManagerException.terminateAndShrinkNotAllowed(job, task));
            }
        }
        Task taskWithKillInitiated = VersionSuppliers.nextVersion(JobFunctions.changeTaskStatus(task, TaskState.KillInitiated, reasonCode, reason, titusRuntime.getClock()), versionSupplier);
        Callable<List<ModelActionHolder>> modelUpdateActions = () -> JobEntityHolders.expectTask(engine, task.getId(), titusRuntime).map(current -> {
            List<ModelActionHolder> updateActions = new ArrayList<>();
            TitusModelAction stateUpdateAction = TitusModelAction.newModelUpdate(self).taskUpdate(taskWithKillInitiated);
            updateActions.addAll(ModelActionHolder.allModels(stateUpdateAction));
            if (shrink) {
                TitusModelAction shrinkAction = createShrinkAction(self, versionSupplier);
                updateActions.add(ModelActionHolder.reference(shrinkAction));
            }
            return updateActions;
        }).orElse(Collections.emptyList());
        return jobStore.updateTask(taskWithKillInitiated).andThen(createKillAction(executionContext, task)).andThen(Observable.fromCallable(modelUpdateActions));
    }));
}
Also used : Completable(rx.Completable) JobManagerConstants(com.netflix.titus.api.jobmanager.service.JobManagerConstants) JobServiceRuntime(com.netflix.titus.master.jobmanager.service.JobServiceRuntime) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) Callable(java.util.concurrent.Callable) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) ArrayList(java.util.ArrayList) Observable(rx.Observable) HashSet(java.util.HashSet) JobStatus(com.netflix.titus.api.jobmanager.model.job.JobStatus) JobState(com.netflix.titus.api.jobmanager.model.job.JobState) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) JobManagerException(com.netflix.titus.api.jobmanager.service.JobManagerException) JobEntityHolders(com.netflix.titus.master.jobmanager.service.common.action.JobEntityHolders) JobStore(com.netflix.titus.api.jobmanager.store.JobStore) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) Job(com.netflix.titus.api.jobmanager.model.job.Job) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) TaskStatus(com.netflix.titus.api.jobmanager.model.job.TaskStatus) Set(java.util.Set) JobFunctions(com.netflix.titus.api.jobmanager.model.job.JobFunctions) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder) List(java.util.List) V3JobOperations(com.netflix.titus.api.jobmanager.service.V3JobOperations) VersionSupplier(com.netflix.titus.master.jobmanager.service.VersionSupplier) ReconciliationEngine(com.netflix.titus.common.framework.reconciler.ReconciliationEngine) VersionSuppliers(com.netflix.titus.master.jobmanager.service.VersionSuppliers) Optional(java.util.Optional) JobManagerReconcilerEvent(com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) Collections(java.util.Collections) TitusModelAction(com.netflix.titus.master.jobmanager.service.common.action.TitusModelAction) Task(com.netflix.titus.api.jobmanager.model.job.Task) Capacity(com.netflix.titus.api.jobmanager.model.job.Capacity) ArrayList(java.util.ArrayList) Job(com.netflix.titus.api.jobmanager.model.job.Job) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState) Callable(java.util.concurrent.Callable) ModelActionHolder(com.netflix.titus.common.framework.reconciler.ModelActionHolder)

Example 25 with CallMetadata

use of com.netflix.titus.api.model.callmetadata.CallMetadata in project titus-control-plane by Netflix.

the class ServiceDifferenceResolver method applyStore.

private List<ChangeAction> applyStore(ReconciliationEngine<JobManagerReconcilerEvent> engine, ServiceJobView refJobView, EntityHolder storeJob, AtomicInteger allowedNewTasks) {
    if (!storeWriteRetryInterceptor.executionLimits(storeJob)) {
        return Collections.emptyList();
    }
    List<ChangeAction> actions = new ArrayList<>();
    EntityHolder refJobHolder = refJobView.getJobHolder();
    Job<ServiceJobExt> refJob = refJobHolder.getEntity();
    if (!refJobHolder.getEntity().equals(storeJob.getEntity())) {
        actions.add(storeWriteRetryInterceptor.apply(BasicJobActions.updateJobInStore(engine, jobStore)));
    }
    boolean isJobTerminating = refJob.getStatus().getState() == JobState.KillInitiated;
    for (EntityHolder referenceTaskHolder : refJobHolder.getChildren()) {
        ServiceJobTask refTask = referenceTaskHolder.getEntity();
        Optional<EntityHolder> storeHolder = storeJob.findById(referenceTaskHolder.getId());
        ServiceJobTask storeTask = storeHolder.get().getEntity();
        boolean refAndStoreInSync = areEquivalent(storeHolder.get(), referenceTaskHolder);
        boolean shouldRetry = !isJobTerminating && refTask.getStatus().getState() == TaskState.Finished && !refTask.getStatus().getReasonCode().equals(TaskStatus.REASON_SCALED_DOWN) && allowedNewTasks.get() > 0;
        if (refAndStoreInSync) {
            TaskState currentTaskState = refTask.getStatus().getState();
            if (currentTaskState == TaskState.Finished) {
                if (isJobTerminating || isScaledDown(storeTask) || hasEnoughTasksRunning(refJobView)) {
                    actions.add(removeFinishedServiceTaskAction(jobStore, storeTask));
                } else if (shouldRetry && TaskRetryers.shouldRetryNow(referenceTaskHolder, clock)) {
                    createNewTaskAction(refJobView, Optional.of(referenceTaskHolder), Collections.emptyList(), Collections.emptyList()).ifPresent(actions::add);
                }
            }
        } else {
            Task task = referenceTaskHolder.getEntity();
            CallMetadata callMetadata = RECONCILER_CALLMETADATA.toBuilder().withCallReason("Writing runtime state changes to store").build();
            actions.add(storeWriteRetryInterceptor.apply(BasicTaskActions.writeReferenceTaskToStore(jobStore, engine, task.getId(), callMetadata, titusRuntime)));
        }
        // Both current and delayed retries are counted
        if (shouldRetry) {
            allowedNewTasks.decrementAndGet();
        }
    }
    return actions;
}
Also used : Task(com.netflix.titus.api.jobmanager.model.job.Task) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) TitusChangeAction(com.netflix.titus.master.jobmanager.service.common.action.TitusChangeAction) ChangeAction(com.netflix.titus.common.framework.reconciler.ChangeAction) CallMetadata(com.netflix.titus.api.model.callmetadata.CallMetadata) ServiceJobExt(com.netflix.titus.api.jobmanager.model.job.ext.ServiceJobExt) ArrayList(java.util.ArrayList) EntityHolder(com.netflix.titus.common.framework.reconciler.EntityHolder) ServiceJobTask(com.netflix.titus.api.jobmanager.model.job.ServiceJobTask) TaskState(com.netflix.titus.api.jobmanager.model.job.TaskState)

Aggregations

CallMetadata (com.netflix.titus.api.model.callmetadata.CallMetadata)35 List (java.util.List)16 ArrayList (java.util.ArrayList)14 Task (com.netflix.titus.api.jobmanager.model.job.Task)13 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)12 Completable (rx.Completable)12 TaskState (com.netflix.titus.api.jobmanager.model.job.TaskState)11 Collections (java.util.Collections)11 Collectors (java.util.stream.Collectors)11 JobManagerException (com.netflix.titus.api.jobmanager.service.JobManagerException)10 Test (org.junit.Test)10 TaskStatus (com.netflix.titus.api.jobmanager.model.job.TaskStatus)9 V3JobOperations (com.netflix.titus.api.jobmanager.service.V3JobOperations)9 CollectionsExt (com.netflix.titus.common.util.CollectionsExt)9 Pair (com.netflix.titus.common.util.tuple.Pair)9 Set (java.util.Set)9 TimeUnit (java.util.concurrent.TimeUnit)9 TaskAttributes (com.netflix.titus.api.jobmanager.TaskAttributes)7 ServiceJobTask (com.netflix.titus.api.jobmanager.model.job.ServiceJobTask)7 JobManagerEvent (com.netflix.titus.api.jobmanager.model.job.event.JobManagerEvent)7