use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.
the class SingularityDeployChecker method checkDeployProgress.
private SingularityDeployResult checkDeployProgress(final SingularityRequest request, final Optional<SingularityDeployMarker> cancelRequest, final SingularityPendingDeploy pendingDeploy, final Optional<SingularityUpdatePendingDeployRequest> updatePendingDeployRequest, final Optional<SingularityDeploy> deploy, final Collection<SingularityTaskId> deployActiveTasks, final Collection<SingularityTaskId> otherActiveTasks) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (cancelRequest.isPresent()) {
LOG.info("Canceling a deploy {} due to cancel request {}", pendingDeploy, cancelRequest.get());
String userMessage = cancelRequest.get().getUser().isPresent() ? String.format(" by %s", cancelRequest.get().getUser().get()) : "";
return new SingularityDeployResult(DeployState.CANCELED, Optional.of(String.format("Canceled due to request%s at %s", userMessage, cancelRequest.get().getTimestamp())), pendingDeploy.getLastLoadBalancerUpdate(), Collections.<SingularityDeployFailure>emptyList(), System.currentTimeMillis());
}
if (deployProgress.isStepComplete()) {
return checkCanMoveToNextDeployStep(request, deploy, pendingDeploy, updatePendingDeployRequest);
}
final boolean isDeployOverdue = isDeployOverdue(pendingDeploy, deploy);
if (deployActiveTasks.size() < deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
}
if (shouldCheckLbState(pendingDeploy)) {
final SingularityLoadBalancerUpdate lbUpdate = lbClient.getState(getLoadBalancerRequestId(pendingDeploy));
return processLbState(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks, tasksToShutDown(deployProgress, otherActiveTasks, request), lbUpdate);
}
if (isDeployOverdue && request.isLoadBalanced() && shouldCancelLoadBalancer(pendingDeploy)) {
return cancelLoadBalancer(pendingDeploy, getDeployFailures(request, deploy, pendingDeploy, DeployState.OVERDUE, deployActiveTasks));
}
if (isWaitingForCurrentLbRequest(pendingDeploy)) {
return new SingularityDeployResult(DeployState.WAITING, Optional.of("Waiting on load balancer API"), pendingDeploy.getLastLoadBalancerUpdate());
}
final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(request, deploy, deployActiveTasks, true);
switch(deployHealth) {
case WAITING:
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
case HEALTHY:
if (!request.isLoadBalanced()) {
return markStepFinished(pendingDeploy, deploy, deployActiveTasks, otherActiveTasks, request, updatePendingDeployRequest);
}
if (updatePendingDeployRequest.isPresent() && updatePendingDeployRequest.get().getTargetActiveInstances() != deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return new SingularityDeployResult(DeployState.WAITING);
}
if (configuration.getLoadBalancerUri() == null) {
LOG.warn("Deploy {} required a load balancer URI but it wasn't set", pendingDeploy);
return new SingularityDeployResult(DeployState.FAILED, Optional.of("No valid load balancer URI was present"), Optional.<SingularityLoadBalancerUpdate>absent(), Collections.<SingularityDeployFailure>emptyList(), System.currentTimeMillis());
}
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return enqueueAndProcessLbRequest(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks);
case UNHEALTHY:
default:
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return getDeployResultWithFailures(request, deploy, pendingDeploy, DeployState.FAILED, "Not all tasks for deploy were healthy", deployActiveTasks);
}
}
use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.
the class SingularityCleaner method getNumHealthyTasks.
private int getNumHealthyTasks(SingularityRequest request, String deployId, List<SingularityTaskId> matchingTasks) {
Optional<SingularityDeploy> deploy = deployManager.getDeploy(request.getId(), deployId);
List<SingularityTaskId> healthyTasks = deployHealthHelper.getHealthyTasks(request, deploy, matchingTasks, false);
int numHealthyTasks = 0;
for (SingularityTaskId taskId : healthyTasks) {
DeployHealth lbHealth = getLbHealth(request, taskId);
if (lbHealth == DeployHealth.HEALTHY) {
numHealthyTasks++;
}
}
return numHealthyTasks;
}
use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.
the class SingularityNewTaskChecker method getTaskState.
@VisibleForTesting
CheckTaskState getTaskState(SingularityTask task, Optional<SingularityRequestWithState> requestWithState, SingularityHealthchecker healthchecker) {
if (!taskManager.isActiveTask(task.getTaskId().getId())) {
return CheckTaskState.OBSOLETE;
}
SimplifiedTaskState taskState = SingularityTaskHistoryUpdate.getCurrentState(taskManager.getTaskHistoryUpdates(task.getTaskId()));
switch(taskState) {
case DONE:
return CheckTaskState.OBSOLETE;
case WAITING:
case UNKNOWN:
return CheckTaskState.CHECK_IF_TASK_OVERDUE;
case RUNNING:
break;
}
if (hasHealthcheck(task, requestWithState)) {
Optional<SingularityTaskHealthcheckResult> maybeHealthCheck = taskManager.getLastHealthcheck(task.getTaskId());
DeployHealth health = deployHealthHelper.getTaskHealth(task.getTaskRequest().getDeploy(), false, maybeHealthCheck, task.getTaskId());
switch(health) {
case WAITING:
healthchecker.checkHealthcheck(task);
return CheckTaskState.CHECK_IF_HEALTHCHECK_OVERDUE;
case UNHEALTHY:
taskManager.clearStartupHealthchecks(task.getTaskId());
return CheckTaskState.UNHEALTHY_KILL_TASK;
case HEALTHY:
taskManager.clearStartupHealthchecks(task.getTaskId());
break;
}
}
// task is running + has succeeded healthcheck if available.
if (!task.getTaskRequest().getRequest().isLoadBalanced()) {
return CheckTaskState.HEALTHY;
}
Optional<SingularityLoadBalancerUpdate> lbUpdate = taskManager.getLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD);
SingularityLoadBalancerUpdate newLbUpdate;
final LoadBalancerRequestId loadBalancerRequestId = new LoadBalancerRequestId(task.getTaskId().getId(), LoadBalancerRequestType.ADD, Optional.absent());
boolean taskCleaning = taskManager.getCleanupTaskIds().contains(task.getTaskId());
if ((!lbUpdate.isPresent() || unknownNotRemoving(lbUpdate.get())) && !taskCleaning) {
taskManager.saveLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD, new SingularityLoadBalancerUpdate(BaragonRequestState.UNKNOWN, loadBalancerRequestId, Optional.absent(), System.currentTimeMillis(), LoadBalancerMethod.PRE_ENQUEUE, Optional.absent()));
newLbUpdate = lbClient.enqueue(loadBalancerRequestId, task.getTaskRequest().getRequest(), task.getTaskRequest().getDeploy(), Collections.singletonList(task), Collections.emptyList());
} else {
Optional<CheckTaskState> maybeCheckTaskState = checkLbState(lbUpdate.get().getLoadBalancerState());
if (maybeCheckTaskState.isPresent()) {
return maybeCheckTaskState.get();
}
newLbUpdate = lbClient.getState(loadBalancerRequestId);
}
taskManager.saveLoadBalancerState(task.getTaskId(), LoadBalancerRequestType.ADD, newLbUpdate);
Optional<CheckTaskState> maybeCheckTaskState = checkLbState(newLbUpdate.getLoadBalancerState());
if (maybeCheckTaskState.isPresent()) {
return maybeCheckTaskState.get();
}
return CheckTaskState.LB_IN_PROGRESS_CHECK_AGAIN;
}
use of com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth in project Singularity by HubSpot.
the class SingularityCleaner method shouldKillTask.
private boolean shouldKillTask(SingularityTaskCleanup taskCleanup, List<SingularityTaskId> activeTaskIds, Set<SingularityTaskId> cleaningTasks, Multiset<SingularityDeployKey> incrementalCleaningTasks) {
final Optional<SingularityRequestWithState> requestWithState = requestManager.getRequest(taskCleanup.getTaskId().getRequestId());
if (!requestWithState.isPresent()) {
LOG.debug("Killing a task {} immediately because the request was missing", taskCleanup);
return true;
}
final SingularityRequest request = requestWithState.get().getRequest();
if (taskCleanup.getRunBeforeKillId().isPresent()) {
List<SingularityTaskShellCommandUpdate> shellCommandUpdates = taskManager.getTaskShellCommandUpdates(taskCleanup.getRunBeforeKillId().get());
boolean finished = false;
for (SingularityTaskShellCommandUpdate update : shellCommandUpdates) {
if (update.getUpdateType().isFinished()) {
finished = true;
break;
}
}
if (!finished) {
LOG.debug("Waiting for pre-kill shell command {} to finish before killing task", taskCleanup.getRunBeforeKillId());
return false;
}
}
if (taskCleanup.getCleanupType().shouldKillTaskInstantly(request)) {
LOG.debug("Killing a task {} immediately because of its cleanup type", taskCleanup);
return true;
}
// If pausing, must be a long-running task to kill here
if (requestWithState.get().getState() == RequestState.PAUSED && (!(taskCleanup.getCleanupType() == TaskCleanupType.PAUSING) || request.isLongRunning())) {
LOG.debug("Killing a task {} immediately because the request was paused", taskCleanup);
return true;
}
if (!request.isLongRunning()) {
final long timeSinceCleanup = System.currentTimeMillis() - taskCleanup.getTimestamp();
final long maxWaitTime = request.getKillOldNonLongRunningTasksAfterMillis().or(killNonLongRunningTasksInCleanupAfterMillis);
final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);
if (!tooOld) {
LOG.trace("Not killing a non-longRunning task {}, running time since cleanup {} (max wait time is {})", taskCleanup, timeSinceCleanup, maxWaitTime);
} else {
LOG.debug("Killing a non-longRunning task {} - running time since cleanup {} exceeded max wait time {}", taskCleanup, timeSinceCleanup, maxWaitTime);
}
return tooOld;
}
final String requestId = request.getId();
final Optional<SingularityRequestDeployState> deployState = deployManager.getRequestDeployState(requestId);
if (taskCleanup.getCleanupType() == TaskCleanupType.DECOMISSIONING && deployState.get().getPendingDeploy().isPresent() && deployState.get().getPendingDeploy().get().getDeployId().equals(taskCleanup.getTaskId().getDeployId())) {
final long timeSinceCleanup = System.currentTimeMillis() - taskCleanup.getTimestamp();
final long maxWaitTime = configuration.getPendingDeployHoldTaskDuringDecommissionMillis();
final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);
if (!tooOld) {
LOG.trace("Not killing {} - part of pending deploy - running time since cleanup {} (max wait time is {})", taskCleanup, timeSinceCleanup, maxWaitTime);
return false;
} else {
LOG.debug("Killing {} - part of pending deploy but running time since cleanup {} exceeded max wait time {}", taskCleanup, timeSinceCleanup, maxWaitTime);
return true;
}
}
if (!deployState.isPresent() || !deployState.get().getActiveDeploy().isPresent()) {
LOG.debug("Killing a task {} immediately because there is no active deploy state {}", taskCleanup, deployState);
return true;
}
final String activeDeployId = deployState.get().getActiveDeploy().get().getDeployId();
final String matchingTasksDeployId = taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED || taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_FAILED ? activeDeployId : taskCleanup.getTaskId().getDeployId();
// check to see if there are enough active tasks out there that have been active for long enough that we can safely shut this task down.
final List<SingularityTaskId> matchingTasks = new ArrayList<>();
for (SingularityTaskId taskId : activeTaskIds) {
if (!taskId.getRequestId().equals(requestId) || !taskId.getDeployId().equals(matchingTasksDeployId)) {
continue;
}
if (cleaningTasks.contains(taskId)) {
continue;
}
matchingTasks.add(taskId);
}
// For an incremental bounce or incremental deploy cleanup, shut down old tasks as new ones are started
final SingularityDeployKey key = SingularityDeployKey.fromTaskId(taskCleanup.getTaskId());
if (taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_BOUNCE) {
return shouldKillIncrementalBounceTask(request, taskCleanup, matchingTasksDeployId, matchingTasks, key, incrementalCleaningTasks);
} else if (isIncrementalDeployCleanup(taskCleanup)) {
return shouldKillIncrementalDeployCleanupTask(request, taskCleanup, matchingTasksDeployId, matchingTasks, key, incrementalCleaningTasks);
} else {
if (matchingTasks.size() < request.getInstancesSafe()) {
LOG.trace("Not killing a task {} yet, only {} matching out of a required {}", taskCleanup, matchingTasks.size(), request.getInstancesSafe());
return false;
}
}
final Optional<SingularityDeploy> deploy = deployManager.getDeploy(requestId, activeDeployId);
final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(requestWithState.get().getRequest(), deploy, matchingTasks, false);
switch(deployHealth) {
case HEALTHY:
for (SingularityTaskId taskId : matchingTasks) {
DeployHealth lbHealth = getLbHealth(request, taskId);
if (lbHealth != DeployHealth.HEALTHY) {
LOG.trace("Not killing a task {}, waiting for new replacement tasks to be added to LB (current state: {})", taskCleanup, lbHealth);
return false;
}
}
LOG.debug("Killing a task {}, all replacement tasks are healthy", taskCleanup);
return true;
case WAITING:
case UNHEALTHY:
default:
LOG.trace("Not killing a task {}, waiting for new replacement tasks to be healthy (current state: {})", taskCleanup, deployHealth);
return false;
}
}
Aggregations