Search in sources :

Example 1 with DeployHealth

use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.

the class SingularityDeployChecker method checkDeployProgress.

private SingularityDeployResult checkDeployProgress(final SingularityRequest request, final Optional<SingularityDeployMarker> cancelRequest, final SingularityPendingDeploy pendingDeploy, final Optional<SingularityUpdatePendingDeployRequest> updatePendingDeployRequest, final Optional<SingularityDeploy> deploy, final Collection<SingularityTaskId> deployActiveTasks, final Collection<SingularityTaskId> otherActiveTasks) {
    SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
    if (cancelRequest.isPresent()) {
        LOG.info("Canceling a deploy {} due to cancel request {}", pendingDeploy, cancelRequest.get());
        String userMessage = cancelRequest.get().getUser().isPresent() ? String.format(" by %s", cancelRequest.get().getUser().get()) : "";
        return new SingularityDeployResult(DeployState.CANCELED, Optional.of(String.format("Canceled due to request%s at %s", userMessage, cancelRequest.get().getTimestamp())), pendingDeploy.getLastLoadBalancerUpdate(), Collections.<SingularityDeployFailure>emptyList(), System.currentTimeMillis());
    }
    if (deployProgress.isStepComplete()) {
        return checkCanMoveToNextDeployStep(request, deploy, pendingDeploy, updatePendingDeployRequest);
    }
    final boolean isDeployOverdue = isDeployOverdue(pendingDeploy, deploy);
    if (deployActiveTasks.size() < deployProgress.getTargetActiveInstances()) {
        maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
        return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
    }
    if (shouldCheckLbState(pendingDeploy)) {
        final SingularityLoadBalancerUpdate lbUpdate = lbClient.getState(getLoadBalancerRequestId(pendingDeploy));
        return processLbState(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks, tasksToShutDown(deployProgress, otherActiveTasks, request), lbUpdate);
    }
    if (isDeployOverdue && request.isLoadBalanced() && shouldCancelLoadBalancer(pendingDeploy)) {
        return cancelLoadBalancer(pendingDeploy, getDeployFailures(request, deploy, pendingDeploy, DeployState.OVERDUE, deployActiveTasks));
    }
    if (isWaitingForCurrentLbRequest(pendingDeploy)) {
        return new SingularityDeployResult(DeployState.WAITING, Optional.of("Waiting on load balancer API"), pendingDeploy.getLastLoadBalancerUpdate());
    }
    final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(request, deploy, deployActiveTasks, true);
    switch(deployHealth) {
        case WAITING:
            maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
            return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
        case HEALTHY:
            if (!request.isLoadBalanced()) {
                return markStepFinished(pendingDeploy, deploy, deployActiveTasks, otherActiveTasks, request, updatePendingDeployRequest);
            }
            if (updatePendingDeployRequest.isPresent() && updatePendingDeployRequest.get().getTargetActiveInstances() != deployProgress.getTargetActiveInstances()) {
                maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
                return new SingularityDeployResult(DeployState.WAITING);
            }
            if (configuration.getLoadBalancerUri() == null) {
                LOG.warn("Deploy {} required a load balancer URI but it wasn't set", pendingDeploy);
                return new SingularityDeployResult(DeployState.FAILED, Optional.of("No valid load balancer URI was present"), Optional.<SingularityLoadBalancerUpdate>absent(), Collections.<SingularityDeployFailure>emptyList(), System.currentTimeMillis());
            }
            for (SingularityTaskId activeTaskId : deployActiveTasks) {
                taskManager.markHealthchecksFinished(activeTaskId);
                taskManager.clearStartupHealthchecks(activeTaskId);
            }
            return enqueueAndProcessLbRequest(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks);
        case UNHEALTHY:
        default:
            for (SingularityTaskId activeTaskId : deployActiveTasks) {
                taskManager.markHealthchecksFinished(activeTaskId);
                taskManager.clearStartupHealthchecks(activeTaskId);
            }
            return getDeployResultWithFailures(request, deploy, pendingDeploy, DeployState.FAILED, "Not all tasks for deploy were healthy", deployActiveTasks);
    }
}
Also used : SingularityLoadBalancerUpdate(com.hubspot.singularity.SingularityLoadBalancerUpdate) SingularityDeployResult(com.hubspot.singularity.SingularityDeployResult) DeployHealth(com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth) SingularityDeployProgress(com.hubspot.singularity.SingularityDeployProgress) SingularityTaskId(com.hubspot.singularity.SingularityTaskId)

Example 2 with DeployHealth

use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.

the class SingularityCleaner method getNumHealthyTasks.

private int getNumHealthyTasks(SingularityRequest request, String deployId, List<SingularityTaskId> matchingTasks) {
    Optional<SingularityDeploy> deploy = deployManager.getDeploy(request.getId(), deployId);
    List<SingularityTaskId> healthyTasks = deployHealthHelper.getHealthyTasks(request, deploy, matchingTasks, false);
    int numHealthyTasks = 0;
    for (SingularityTaskId taskId : healthyTasks) {
        DeployHealth lbHealth = getLbHealth(request, taskId);
        if (lbHealth == DeployHealth.HEALTHY) {
            numHealthyTasks++;
        }
    }
    return numHealthyTasks;
}
Also used : DeployHealth(com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth) SingularityDeploy(com.hubspot.singularity.SingularityDeploy) SingularityTaskId(com.hubspot.singularity.SingularityTaskId)

Example 3 with DeployHealth

use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.

the class SingularityNewTaskChecker method getTaskState.

@VisibleForTesting
CheckTaskState getTaskState(SingularityTask task, Optional<SingularityRequestWithState> requestWithState, SingularityHealthchecker healthchecker) {
    if (!taskManager.isActiveTask(task.getTaskId().getId())) {
        return CheckTaskState.OBSOLETE;
    }
    SimplifiedTaskState taskState = SingularityTaskHistoryUpdate.getCurrentState(taskManager.getTaskHistoryUpdates(task.getTaskId()));
    switch(taskState) {
        case DONE:
            return CheckTaskState.OBSOLETE;
        case WAITING:
        case UNKNOWN:
            return CheckTaskState.CHECK_IF_TASK_OVERDUE;
        case RUNNING:
            break;
    }
    if (hasHealthcheck(task, requestWithState)) {
        Optional<SingularityTaskHealthcheckResult> maybeHealthCheck = taskManager.getLastHealthcheck(task.getTaskId());
        DeployHealth health = deployHealthHelper.getTaskHealth(task.getTaskRequest().getDeploy(), false, maybeHealthCheck, task.getTaskId());
        switch(health) {
            case WAITING:
                healthchecker.checkHealthcheck(task);
                return CheckTaskState.CHECK_IF_HEALTHCHECK_OVERDUE;
            case UNHEALTHY:
                taskManager.clearStartupHealthchecks(task.getTaskId());
                return CheckTaskState.UNHEALTHY_KILL_TASK;
            case HEALTHY:
                taskManager.clearStartupHealthchecks(task.getTaskId());
                break;
        }
    }
    // task is running + has succeeded healthcheck if available.
    if (!task.getTaskRequest().getRequest().isLoadBalanced()) {
        return CheckTaskState.HEALTHY;
    }
    Optional<SingularityLoadBalancerUpdate> lbUpdate = taskManager.getLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD);
    SingularityLoadBalancerUpdate newLbUpdate;
    final LoadBalancerRequestId loadBalancerRequestId = new LoadBalancerRequestId(task.getTaskId().getId(), LoadBalancerRequestType.ADD, Optional.absent());
    boolean taskCleaning = taskManager.getCleanupTaskIds().contains(task.getTaskId());
    if ((!lbUpdate.isPresent() || unknownNotRemoving(lbUpdate.get())) && !taskCleaning) {
        taskManager.saveLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD, new SingularityLoadBalancerUpdate(BaragonRequestState.UNKNOWN, loadBalancerRequestId, Optional.absent(), System.currentTimeMillis(), LoadBalancerMethod.PRE_ENQUEUE, Optional.absent()));
        newLbUpdate = lbClient.enqueue(loadBalancerRequestId, task.getTaskRequest().getRequest(), task.getTaskRequest().getDeploy(), Collections.singletonList(task), Collections.emptyList());
    } else {
        Optional<CheckTaskState> maybeCheckTaskState = checkLbState(lbUpdate.get().getLoadBalancerState());
        if (maybeCheckTaskState.isPresent()) {
            return maybeCheckTaskState.get();
        }
        newLbUpdate = lbClient.getState(loadBalancerRequestId);
    }
    taskManager.saveLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD, newLbUpdate);
    Optional<CheckTaskState> maybeCheckTaskState = checkLbState(newLbUpdate.getLoadBalancerState());
    if (maybeCheckTaskState.isPresent()) {
        return maybeCheckTaskState.get();
    }
    return CheckTaskState.LB_IN_PROGRESS_CHECK_AGAIN;
}
Also used : SingularityLoadBalancerUpdate(com.hubspot.singularity.SingularityLoadBalancerUpdate) SingularityTaskHealthcheckResult(com.hubspot.singularity.SingularityTaskHealthcheckResult) LoadBalancerRequestId(com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId) SimplifiedTaskState(com.hubspot.singularity.SingularityTaskHistoryUpdate.SimplifiedTaskState) DeployHealth(com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with DeployHealth

use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.

the class SingularityCleaner method shouldKillTask.

private boolean shouldKillTask(SingularityTaskCleanup taskCleanup, List<SingularityTaskId> activeTaskIds, Set<SingularityTaskId> cleaningTasks, Multiset<SingularityDeployKey> incrementalCleaningTasks) {
    final Optional<SingularityRequestWithState> requestWithState = requestManager.getRequest(taskCleanup.getTaskId().getRequestId());
    if (!requestWithState.isPresent()) {
        LOG.debug("Killing a task {} immediately because the request was missing", taskCleanup);
        return true;
    }
    final SingularityRequest request = requestWithState.get().getRequest();
    if (taskCleanup.getRunBeforeKillId().isPresent()) {
        List<SingularityTaskShellCommandUpdate> shellCommandUpdates = taskManager.getTaskShellCommandUpdates(taskCleanup.getRunBeforeKillId().get());
        boolean finished = false;
        for (SingularityTaskShellCommandUpdate update : shellCommandUpdates) {
            if (update.getUpdateType().isFinished()) {
                finished = true;
                break;
            }
        }
        if (!finished) {
            LOG.debug("Waiting for pre-kill shell command {} to finish before killing task", taskCleanup.getRunBeforeKillId());
            return false;
        }
    }
    if (taskCleanup.getCleanupType().shouldKillTaskInstantly(request)) {
        LOG.debug("Killing a task {} immediately because of its cleanup type", taskCleanup);
        return true;
    }
    // If pausing, must be a long-running task to kill here
    if (requestWithState.get().getState() == RequestState.PAUSED && (!(taskCleanup.getCleanupType() == TaskCleanupType.PAUSING) || request.isLongRunning())) {
        LOG.debug("Killing a task {} immediately because the request was paused", taskCleanup);
        return true;
    }
    if (!request.isLongRunning()) {
        final long timeSinceCleanup = System.currentTimeMillis() - taskCleanup.getTimestamp();
        final long maxWaitTime = request.getKillOldNonLongRunningTasksAfterMillis().or(killNonLongRunningTasksInCleanupAfterMillis);
        final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);
        if (!tooOld) {
            LOG.trace("Not killing a non-longRunning task {}, running time since cleanup {} (max wait time is {})", taskCleanup, timeSinceCleanup, maxWaitTime);
        } else {
            LOG.debug("Killing a non-longRunning task {} - running time since cleanup {} exceeded max wait time {}", taskCleanup, timeSinceCleanup, maxWaitTime);
        }
        return tooOld;
    }
    final String requestId = request.getId();
    final Optional<SingularityRequestDeployState> deployState = deployManager.getRequestDeployState(requestId);
    if (taskCleanup.getCleanupType() == TaskCleanupType.DECOMISSIONING && deployState.get().getPendingDeploy().isPresent() && deployState.get().getPendingDeploy().get().getDeployId().equals(taskCleanup.getTaskId().getDeployId())) {
        final long timeSinceCleanup = System.currentTimeMillis() - taskCleanup.getTimestamp();
        final long maxWaitTime = configuration.getPendingDeployHoldTaskDuringDecommissionMillis();
        final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);
        if (!tooOld) {
            LOG.trace("Not killing {} - part of pending deploy - running time since cleanup {} (max wait time is {})", taskCleanup, timeSinceCleanup, maxWaitTime);
            return false;
        } else {
            LOG.debug("Killing {} - part of pending deploy but running time since cleanup {} exceeded max wait time {}", taskCleanup, timeSinceCleanup, maxWaitTime);
            return true;
        }
    }
    if (!deployState.isPresent() || !deployState.get().getActiveDeploy().isPresent()) {
        LOG.debug("Killing a task {} immediately because there is no active deploy state {}", taskCleanup, deployState);
        return true;
    }
    final String activeDeployId = deployState.get().getActiveDeploy().get().getDeployId();
    final String matchingTasksDeployId = taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED || taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_FAILED ? activeDeployId : taskCleanup.getTaskId().getDeployId();
    // check to see if there are enough active tasks out there that have been active for long enough that we can safely shut this task down.
    final List<SingularityTaskId> matchingTasks = new ArrayList<>();
    for (SingularityTaskId taskId : activeTaskIds) {
        if (!taskId.getRequestId().equals(requestId) || !taskId.getDeployId().equals(matchingTasksDeployId)) {
            continue;
        }
        if (cleaningTasks.contains(taskId)) {
            continue;
        }
        matchingTasks.add(taskId);
    }
    // For an incremental bounce or incremental deploy cleanup, shut down old tasks as new ones are started
    final SingularityDeployKey key = SingularityDeployKey.fromTaskId(taskCleanup.getTaskId());
    if (taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_BOUNCE) {
        return shouldKillIncrementalBounceTask(request, taskCleanup, matchingTasksDeployId, matchingTasks, key, incrementalCleaningTasks);
    } else if (isIncrementalDeployCleanup(taskCleanup)) {
        return shouldKillIncrementalDeployCleanupTask(request, taskCleanup, matchingTasksDeployId, matchingTasks, key, incrementalCleaningTasks);
    } else {
        if (matchingTasks.size() < request.getInstancesSafe()) {
            LOG.trace("Not killing a task {} yet, only {} matching out of a required {}", taskCleanup, matchingTasks.size(), request.getInstancesSafe());
            return false;
        }
    }
    final Optional<SingularityDeploy> deploy = deployManager.getDeploy(requestId, activeDeployId);
    final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(requestWithState.get().getRequest(), deploy, matchingTasks, false);
    switch(deployHealth) {
        case HEALTHY:
            for (SingularityTaskId taskId : matchingTasks) {
                DeployHealth lbHealth = getLbHealth(request, taskId);
                if (lbHealth != DeployHealth.HEALTHY) {
                    LOG.trace("Not killing a task {}, waiting for new replacement tasks to be added to LB (current state: {})", taskCleanup, lbHealth);
                    return false;
                }
            }
            LOG.debug("Killing a task {}, all replacement tasks are healthy", taskCleanup);
            return true;
        case WAITING:
        case UNHEALTHY:
        default:
            LOG.trace("Not killing a task {}, waiting for new replacement tasks to be healthy (current state: {})", taskCleanup, deployHealth);
            return false;
    }
}
Also used : SingularityRequest(com.hubspot.singularity.SingularityRequest) ArrayList(java.util.ArrayList) SingularityRequestDeployState(com.hubspot.singularity.SingularityRequestDeployState) SingularityDeploy(com.hubspot.singularity.SingularityDeploy) SingularityDeployKey(com.hubspot.singularity.SingularityDeployKey) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) SingularityTaskShellCommandUpdate(com.hubspot.singularity.SingularityTaskShellCommandUpdate) DeployHealth(com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth) SingularityTaskId(com.hubspot.singularity.SingularityTaskId)

Aggregations

DeployHealth (com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth)4 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)3 SingularityDeploy (com.hubspot.singularity.SingularityDeploy)2 SingularityLoadBalancerUpdate (com.hubspot.singularity.SingularityLoadBalancerUpdate)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 LoadBalancerRequestId (com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId)1 SingularityDeployKey (com.hubspot.singularity.SingularityDeployKey)1 SingularityDeployProgress (com.hubspot.singularity.SingularityDeployProgress)1 SingularityDeployResult (com.hubspot.singularity.SingularityDeployResult)1 SingularityRequest (com.hubspot.singularity.SingularityRequest)1 SingularityRequestDeployState (com.hubspot.singularity.SingularityRequestDeployState)1 SingularityRequestWithState (com.hubspot.singularity.SingularityRequestWithState)1 SingularityTaskHealthcheckResult (com.hubspot.singularity.SingularityTaskHealthcheckResult)1 SimplifiedTaskState (com.hubspot.singularity.SingularityTaskHistoryUpdate.SimplifiedTaskState)1 SingularityTaskShellCommandUpdate (com.hubspot.singularity.SingularityTaskShellCommandUpdate)1 ArrayList (java.util.ArrayList)1