use of com.hubspot.singularity.SingularityDeployFailure in project Singularity by HubSpot.
the class SingularityDeployHealthHelper method getTaskFailure.
private Optional<SingularityDeployFailure> getTaskFailure(SingularityDeploy deploy, Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates, Map<SingularityTaskId, SingularityTaskHealthcheckResult> healthcheckResults, SingularityTaskId taskId) {
SingularityTaskHealthcheckResult healthcheckResult = healthcheckResults.get(taskId);
Optional<SingularityDeployFailure> maybeFailure;
if (healthcheckResult == null) {
maybeFailure = getNonHealthcheckedTaskFailure(taskUpdates, taskId);
} else {
maybeFailure = getHealthcheckedTaskFailure(deploy, taskUpdates, healthcheckResult, taskId);
}
return maybeFailure;
}
use of com.hubspot.singularity.SingularityDeployFailure in project Singularity by HubSpot.
the class SingularityDeployHealthHelper method getTaskFailures.
public List<SingularityDeployFailure> getTaskFailures(final Optional<SingularityDeploy> deploy, final Collection<SingularityTaskId> activeTasks) {
List<SingularityDeployFailure> failures = new ArrayList<>();
Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates = taskManager.getTaskHistoryUpdates(activeTasks);
Map<SingularityTaskId, SingularityTaskHealthcheckResult> healthcheckResults = taskManager.getLastHealthcheck(activeTasks);
for (SingularityTaskId taskId : activeTasks) {
Optional<SingularityDeployFailure> maybeFailure = getTaskFailure(deploy.get(), taskUpdates, healthcheckResults, taskId);
if (maybeFailure.isPresent()) {
failures.add(maybeFailure.get());
}
}
return failures;
}
use of com.hubspot.singularity.SingularityDeployFailure in project Singularity by HubSpot.
the class SingularityDeployChecker method getDeployFailures.
private List<SingularityDeployFailure> getDeployFailures(SingularityRequest request, Optional<SingularityDeploy> deploy, SingularityPendingDeploy pendingDeploy, DeployState state, Collection<SingularityTaskId> matchingTasks) {
List<SingularityDeployFailure> failures = new ArrayList<>();
failures.addAll(deployHealthHelper.getTaskFailures(deploy, matchingTasks));
if (state == DeployState.OVERDUE) {
int targetInstances = pendingDeploy.getDeployProgress().isPresent() ? pendingDeploy.getDeployProgress().get().getTargetActiveInstances() : request.getInstancesSafe();
if (failures.isEmpty() && matchingTasks.size() < targetInstances) {
failures.add(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_COULD_NOT_BE_SCHEDULED, Optional.<SingularityTaskId>absent(), Optional.of(String.format("Only %s of %s tasks could be launched for deploy, there may not be enough resources to launch the remaining tasks", matchingTasks.size(), targetInstances))));
}
}
return failures;
}
use of com.hubspot.singularity.SingularityDeployFailure in project Singularity by HubSpot.
the class SingularityDeployHealthHelper method getNonHealthcheckedTaskFailure.
private Optional<SingularityDeployFailure> getNonHealthcheckedTaskFailure(Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates, SingularityTaskId taskId) {
List<SingularityTaskHistoryUpdate> updates = taskUpdates.get(taskId);
SingularityTaskHistoryUpdate lastUpdate = Iterables.getLast(updates);
if (lastUpdate.getTaskState().isSuccess()) {
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or("")))));
} else if (lastUpdate.getTaskState().isDone()) {
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_ON_STARTUP, Optional.of(taskId), lastUpdate.getStatusMessage()));
} else if (SingularityTaskHistoryUpdate.getCurrentState(updates) == SimplifiedTaskState.WAITING) {
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_NEVER_ENTERED_RUNNING, Optional.of(taskId), Optional.of(String.format("Task never entered running state, last state was %s (%s)", lastUpdate.getTaskState().getDisplayName(), lastUpdate.getStatusMessage().or("")))));
}
return Optional.absent();
}
use of com.hubspot.singularity.SingularityDeployFailure in project Singularity by HubSpot.
the class SingularityDeployHealthHelper method getHealthcheckedTaskFailure.
private Optional<SingularityDeployFailure> getHealthcheckedTaskFailure(SingularityDeploy deploy, Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates, SingularityTaskHealthcheckResult healthcheckResult, SingularityTaskId taskId) {
Collection<SingularityTaskHistoryUpdate> updates = taskUpdates.get(taskId);
if (!healthcheckResult.isFailed()) {
return Optional.absent();
}
SingularityTaskHistoryUpdate lastUpdate = Iterables.getLast(updates);
if (lastUpdate.getTaskState().isDone()) {
if (lastUpdate.getTaskState().isSuccess()) {
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or("")))));
} else {
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_ON_STARTUP, Optional.of(taskId), lastUpdate.getStatusMessage()));
}
}
final Optional<Integer> healthcheckMaxRetries = deploy.getHealthcheck().isPresent() ? deploy.getHealthcheck().get().getMaxRetries().or(configuration.getHealthcheckMaxRetries()) : configuration.getHealthcheckMaxRetries();
if (healthcheckMaxRetries.isPresent() && taskManager.getNumNonstartupHealthchecks(taskId) > healthcheckMaxRetries.get()) {
String message = String.format("Instance %s failed %s healthchecks, the max for the deploy.", taskId.getInstanceNo(), healthcheckMaxRetries.get() + 1);
if (healthcheckResult.getStatusCode().isPresent()) {
message = String.format("%s Last check returned with status code %s", message, healthcheckResult.getStatusCode().get());
}
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_HEALTH_CHECKS, Optional.of(taskId), Optional.of(message)));
}
Optional<Long> runningAt = getRunningAt(updates);
if (runningAt.isPresent()) {
final long durationSinceRunning = System.currentTimeMillis() - runningAt.get();
if (healthcheckResult.isStartup() && deploy.getHealthcheck().isPresent() && durationSinceRunning > deploy.getHealthcheck().get().getStartupTimeoutSeconds().or(configuration.getStartupTimeoutSeconds())) {
String message = String.format("Instance %s has not responded to healthchecks after running for %s", taskId.getInstanceNo(), JavaUtils.durationFromMillis(durationSinceRunning));
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_HEALTH_CHECKS, Optional.of(taskId), Optional.of(message)));
}
if (isRunningLongerThanThreshold(deploy, durationSinceRunning)) {
String message = String.format("Instance %s has been running for %s and has yet to pass healthchecks.", taskId.getInstanceNo(), JavaUtils.durationFromMillis(durationSinceRunning));
if (healthcheckResult.getStatusCode().isPresent()) {
message = String.format("%s Last check returned with status code %s", message, healthcheckResult.getStatusCode().get());
}
return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_HEALTH_CHECKS, Optional.of(taskId), Optional.of(message)));
}
}
return Optional.absent();
}
Aggregations