Search in sources :

Example 6 with TaskRelocationStatus

use of com.netflix.titus.api.relocation.model.TaskRelocationStatus in project titus-control-plane by Netflix.

the class TaskEvictionStep method execute.

private Map<String, TaskRelocationStatus> execute(Map<String, TaskRelocationPlan> taskToEvict) {
    Map<String, Mono<Void>> actions = taskToEvict.values().stream().collect(Collectors.toMap(TaskRelocationPlan::getTaskId, p -> {
        String message;
        switch(p.getReason()) {
            case AgentEvacuation:
                message = String.format("Agent evacuation: %s", p.getReasonMessage());
                break;
            case SelfManagedMigration:
                message = String.format("Self managed migration requested on %s: %s", DateTimeExt.toUtcDateTimeString(p.getDecisionTime()), p.getReasonMessage());
                break;
            case TaskMigration:
                message = p.getReasonMessage();
                break;
            default:
                message = String.format("[unrecognized relocation reason %s]: %s" + p.getReason(), p.getReasonMessage());
        }
        return evictionServiceClient.terminateTask(p.getTaskId(), message).timeout(EVICTION_TIMEOUT);
    }));
    Map<String, Optional<Throwable>> evictionResults;
    try {
        evictionResults = ReactorExt.merge(actions, CONCURRENCY_LIMIT, scheduler).block();
    } catch (Exception e) {
        logger.warn("Unexpected error when calling the eviction service", e);
        return taskToEvict.values().stream().map(p -> TaskRelocationStatus.newBuilder().withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_SYSTEM_ERROR).withStatusMessage("Unexpected error: " + ExceptionExt.toMessageChain(e)).withTimestamp(clock.wallTime()).build()).collect(Collectors.toMap(TaskRelocationStatus::getTaskId, s -> s));
    }
    Map<String, TaskRelocationStatus> results = new HashMap<>();
    taskToEvict.forEach((taskId, plan) -> {
        Optional<Throwable> evictionResult = evictionResults.get(plan.getTaskId());
        TaskRelocationStatus status;
        if (evictionResult != null) {
            if (!evictionResult.isPresent()) {
                status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Success).withStatusCode(TaskRelocationStatus.STATUS_CODE_TERMINATED).withStatusMessage("Task terminated successfully").withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
            } else {
                status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_EVICTION_ERROR).withStatusMessage(evictionResult.get().getMessage()).withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
            }
        } else {
            // This should never happen
            invariants.inconsistent("Eviction result missing: taskId=%s", plan.getTaskId());
            status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_SYSTEM_ERROR).withStatusMessage("Eviction result missing").withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
        }
        results.put(taskId, status);
        transactionLog.logTaskRelocationStatus(STEP_NAME, "eviction", status);
    });
    return results;
}
Also used : DateTimeExt(com.netflix.titus.common.util.DateTimeExt) Logger(org.slf4j.Logger) EvictionServiceClient(com.netflix.titus.runtime.connector.eviction.EvictionServiceClient) Stopwatch(com.google.common.base.Stopwatch) TaskRelocationState(com.netflix.titus.api.relocation.model.TaskRelocationStatus.TaskRelocationState) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Mono(reactor.core.publisher.Mono) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) Scheduler(reactor.core.scheduler.Scheduler) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) CodeInvariants(com.netflix.titus.common.util.code.CodeInvariants) Duration(java.time.Duration) Map(java.util.Map) Optional(java.util.Optional) ExceptionExt(com.netflix.titus.common.util.ExceptionExt) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus) Clock(com.netflix.titus.common.util.time.Clock) TaskRelocationPlan(com.netflix.titus.api.relocation.model.TaskRelocationPlan) Optional(java.util.Optional) HashMap(java.util.HashMap) Mono(reactor.core.publisher.Mono) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus)

Example 7 with TaskRelocationStatus

use of com.netflix.titus.api.relocation.model.TaskRelocationStatus in project titus-control-plane by Netflix.

the class TaskEvictionStep method evict.

public Map<String, TaskRelocationStatus> evict(Map<String, TaskRelocationPlan> taskToEvict) {
    Stopwatch stopwatch = Stopwatch.createStarted();
    try {
        Map<String, TaskRelocationStatus> result = execute(taskToEvict);
        metrics.onSuccess(result.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS));
        logger.debug("Eviction result: {}", result);
        return result;
    } catch (Exception e) {
        logger.error("Step processing error", e);
        metrics.onError(stopwatch.elapsed(TimeUnit.MILLISECONDS));
        throw e;
    }
}
Also used : Stopwatch(com.google.common.base.Stopwatch) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus)

Example 8 with TaskRelocationStatus

use of com.netflix.titus.api.relocation.model.TaskRelocationStatus in project titus-control-plane by Netflix.

the class ReactorTaskRelocationGrpcService method getLatestTaskRelocationResults.

/**
 * TODO Implement filtering.
 */
public Mono<TaskRelocationExecutions> getLatestTaskRelocationResults(TaskRelocationQuery request) {
    List<TaskRelocationStatus> coreResults = new ArrayList<>(relocationWorkflowExecutor.getLastEvictionResults().values());
    TaskRelocationExecutions grpcResults = toGrpcTaskRelocationExecutions(coreResults);
    return Mono.just(grpcResults);
}
Also used : ArrayList(java.util.ArrayList) RelocationGrpcModelConverters.toGrpcTaskRelocationExecutions(com.netflix.titus.runtime.relocation.endpoint.RelocationGrpcModelConverters.toGrpcTaskRelocationExecutions) TaskRelocationExecutions(com.netflix.titus.grpc.protogen.TaskRelocationExecutions) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus)

Example 9 with TaskRelocationStatus

use of com.netflix.titus.api.relocation.model.TaskRelocationStatus in project titus-control-plane by Netflix.

the class ReactorTaskRelocationGrpcService method getTaskRelocationResult.

public Mono<TaskRelocationExecution> getTaskRelocationResult(RelocationTaskId request) {
    String taskId = request.getId();
    TaskRelocationStatus latest = relocationWorkflowExecutor.getLastEvictionResults().get(taskId);
    return archiveStore.getTaskRelocationStatusList(taskId).flatMap(archived -> {
        if (latest == null && archived.isEmpty()) {
            return Mono.error(new StatusRuntimeException(Status.NOT_FOUND));
        }
        List<TaskRelocationStatus> combined;
        if (latest == null) {
            combined = archived;
        } else if (archived.isEmpty()) {
            combined = Collections.singletonList(latest);
        } else {
            if (CollectionsExt.last(archived).equals(latest)) {
                combined = archived;
            } else {
                combined = CollectionsExt.copyAndAdd(archived, latest);
            }
        }
        return Mono.just(RelocationGrpcModelConverters.toGrpcTaskRelocationExecution(combined));
    });
}
Also used : StatusRuntimeException(io.grpc.StatusRuntimeException) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus)

Example 10 with TaskRelocationStatus

use of com.netflix.titus.api.relocation.model.TaskRelocationStatus in project titus-control-plane by Netflix.

the class TaskRelocationSpringResource method getTaskRelocationResult.

@RequestMapping(method = RequestMethod.GET, path = "/executions/{taskId}", produces = "application/json")
public TaskRelocationExecution getTaskRelocationResult(@PathVariable("taskId") String taskId) {
    TaskRelocationStatus latest = relocationWorkflowExecutor.getLastEvictionResults().get(taskId);
    List<TaskRelocationStatus> archived = archiveStore.getTaskRelocationStatusList(taskId).block();
    if (latest == null && archived.isEmpty()) {
        throw new WebApplicationException(Response.status(Response.Status.NOT_FOUND).build());
    }
    List<TaskRelocationStatus> combined;
    if (latest == null) {
        combined = archived;
    } else if (archived.isEmpty()) {
        combined = Collections.singletonList(latest);
    } else {
        if (CollectionsExt.last(archived).equals(latest)) {
            combined = archived;
        } else {
            combined = CollectionsExt.copyAndAdd(archived, latest);
        }
    }
    return RelocationGrpcModelConverters.toGrpcTaskRelocationExecution(combined);
}
Also used : WebApplicationException(javax.ws.rs.WebApplicationException) TaskRelocationStatus(com.netflix.titus.api.relocation.model.TaskRelocationStatus) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Aggregations

TaskRelocationStatus (com.netflix.titus.api.relocation.model.TaskRelocationStatus)12 Test (org.junit.Test)5 AbstractTaskRelocationTest (com.netflix.titus.supplementary.relocation.AbstractTaskRelocationTest)4 TaskRelocationPlan (com.netflix.titus.api.relocation.model.TaskRelocationPlan)3 Stopwatch (com.google.common.base.Stopwatch)2 Optional (java.util.Optional)2 Task (com.netflix.titus.api.jobmanager.model.job.Task)1 BatchJobExt (com.netflix.titus.api.jobmanager.model.job.ext.BatchJobExt)1 TaskRelocationState (com.netflix.titus.api.relocation.model.TaskRelocationStatus.TaskRelocationState)1 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)1 DateTimeExt (com.netflix.titus.common.util.DateTimeExt)1 ExceptionExt (com.netflix.titus.common.util.ExceptionExt)1 CodeInvariants (com.netflix.titus.common.util.code.CodeInvariants)1 ReactorExt (com.netflix.titus.common.util.rx.ReactorExt)1 Clock (com.netflix.titus.common.util.time.Clock)1 TaskRelocationExecutions (com.netflix.titus.grpc.protogen.TaskRelocationExecutions)1 EvictionServiceClient (com.netflix.titus.runtime.connector.eviction.EvictionServiceClient)1 RelocationGrpcModelConverters.toGrpcTaskRelocationExecutions (com.netflix.titus.runtime.relocation.endpoint.RelocationGrpcModelConverters.toGrpcTaskRelocationExecutions)1 StatusRuntimeException (io.grpc.StatusRuntimeException)1 Duration (java.time.Duration)1