Search in sources :

Example 6 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCrashLoops method getActiveCrashLoops.

List<CrashLoopInfo> getActiveCrashLoops(SingularityDeployStatistics deployStatistics) {
    List<CrashLoopInfo> active = new ArrayList<>();
    if (deployStatistics.getTaskFailureEvents().isEmpty()) {
        return active;
    }
    Optional<SingularityPendingDeploy> maybePending = deployManager.getPendingDeploy(deployStatistics.getRequestId());
    if (maybePending.isPresent() && maybePending.get().getDeployMarker().getDeployId().equals(deployStatistics.getDeployId())) {
        LOG.debug("Not checking cooldown for pending deploy {} - {}", deployStatistics.getRequestId(), deployStatistics.getDeployId());
        return active;
    }
    Optional<SingularityRequestWithState> maybeRequest = requestManager.getRequest(deployStatistics.getRequestId());
    if (!maybeRequest.isPresent()) {
        return active;
    }
    long now = System.currentTimeMillis();
    // Check fast failures
    Optional<Long> maybeCooldownStart = cooldownStart(deployStatistics, Optional.empty());
    if (maybeCooldownStart.isPresent()) {
        active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), maybeCooldownStart.get(), Optional.empty(), CrashLoopType.FAST_FAILURE_LOOP));
    }
    /*
     * Startup failure loop
     * a) small count of failures but instance num matches one that is in cleaning state waiting for a replacement
     */
    Map<Integer, Long> taskCleanStartTimes = taskManager.getCleanupTasks().stream().filter(t -> t.getTaskId().getRequestId().equals(deployStatistics.getRequestId()) && t.getTaskId().getDeployId().equals(deployStatistics.getDeployId())).collect(Collectors.toMap(t -> t.getTaskId().getInstanceNo(), SingularityTaskCleanup::getTimestamp, Math::max));
    Map<Integer, List<Long>> recentStartupFailures = deployStatistics.getTaskFailureEvents().stream().filter(e -> e.getType() == TaskFailureType.STARTUP_FAILURE && taskCleanStartTimes.containsKey(e.getInstance())).collect(Collectors.groupingBy(TaskFailureEvent::getInstance, Collectors.mapping(TaskFailureEvent::getTimestamp, Collectors.toList())));
    boolean hasStartupFailure = false;
    for (Map.Entry<Integer, List<Long>> entry : recentStartupFailures.entrySet()) {
        if (taskCleanStartTimes.containsKey(entry.getKey())) {
            if (entry.getValue().stream().filter(t -> t > taskCleanStartTimes.get(entry.getKey())).count() > 2) {
                active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), entry.getValue().stream().min(Comparator.comparingLong(Long::longValue)).get(), Optional.empty(), CrashLoopType.STARTUP_FAILURE_LOOP));
                hasStartupFailure = true;
                break;
            }
        }
    }
    /*
     * Startup failure loop
     * b) multiple instances failing healthchecks too many times in X minutes
     */
    if (hasStartupFailure) {
        long startupFailThreshold = now - TimeUnit.MINUTES.toMillis(configuration.getEvaluateStartupLoopOverMinutes());
        List<Long> recentStartupFailTimestamps = recentStartupFailures.values().stream().flatMap(List::stream).filter(t -> t > startupFailThreshold).collect(Collectors.toList());
        if (recentStartupFailTimestamps.size() > configuration.getStartupFailureThreshold()) {
            active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), recentStartupFailTimestamps.stream().min(Comparator.comparingLong(Long::longValue)).get(), Optional.empty(), CrashLoopType.STARTUP_FAILURE_LOOP));
        }
    }
    /*
     * OOM Danger. > X OOMs in Y minutes across all instances
     */
    long thresholdOomTime = now - TimeUnit.MINUTES.toMillis(configuration.getEvaluateOomsOverMinutes());
    List<Long> oomFailures = deployStatistics.getTaskFailureEvents().stream().filter(e -> e.getType() == TaskFailureType.OOM && e.getTimestamp() > thresholdOomTime).map(TaskFailureEvent::getTimestamp).collect(Collectors.toList());
    if (oomFailures.size() >= configuration.getOomFailureThreshold()) {
        active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), oomFailures.stream().min(Comparator.comparingLong(Long::longValue)).get(), Optional.empty(), CrashLoopType.OOM));
    }
    /*
     * Single instance failure. > X failures with same instance no in X minutes, bucketed to avoid counting fast failure as one of these
     * Multi instance failure. > X% of instances failing within Y minutes
     */
    Map<Integer, List<Long>> recentFailuresByInstance = deployStatistics.getTaskFailureEvents().stream().filter(e -> e.getType() == TaskFailureType.OOM || e.getType() == TaskFailureType.BAD_EXIT_CODE || e.getType() == TaskFailureType.OUT_OF_DISK_SPACE).collect(Collectors.groupingBy(TaskFailureEvent::getInstance, Collectors.mapping(TaskFailureEvent::getTimestamp, Collectors.toList())));
    for (Map.Entry<Integer, List<Long>> entry : recentFailuresByInstance.entrySet()) {
        Optional<Long> maybeCrashStart = getStartForFailuresInBuckets(now, entry.getValue(), TimeUnit.MINUTES.toMillis(configuration.getSingleInstanceFailureBucketSizeMinutes()), configuration.getSingleInstanceFailureBuckets(), configuration.getSingleInstanceFailureThreshold(), configuration.getSingleInstanceMinBucketIndexPercent());
        if (maybeCrashStart.isPresent()) {
            active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), maybeCrashStart.get(), Optional.empty(), CrashLoopType.SINGLE_INSTANCE_FAILURE_LOOP));
            break;
        }
    }
    Optional<Long> maybeMultiCrashStart = getStartForFailuresInBuckets(now, recentFailuresByInstance.values().stream().flatMap(List::stream).collect(Collectors.toList()), TimeUnit.MINUTES.toMillis(configuration.getMultiInstanceFailureBucketSizeMinutes()), configuration.getMultiInstanceFailureBuckets(), configuration.getMultiInstanceFailureThreshold(), configuration.getMultiInstanceMinBucketIndexPercent());
    if (recentFailuresByInstance.size() > 1 && maybeMultiCrashStart.isPresent()) {
        active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), maybeMultiCrashStart.get(), Optional.empty(), CrashLoopType.MULTI_INSTANCE_FAILURE));
    }
    if (maybeRequest.get().getRequest().isLongRunning()) {
        /*
       * Slow failures. Occasional failures, count on order of hours, looking for consistency in non-zero count each hour
       */
        getStartForFailuresInBuckets(now, recentFailuresByInstance, TimeUnit.MINUTES.toMillis(configuration.getSlowFailureBucketSizeMinutes()), configuration.getSlowFailureBuckets(), configuration.getSlowFailureThreshold(), configuration.getSlowFailureMinBucketIndexPercent()).ifPresent(start -> active.add(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), start, Optional.empty(), CrashLoopType.SLOW_FAILURES)));
        getUnexpectedExitLoop(now, deployStatistics).ifPresent(active::add);
    }
    return active;
}
Also used : SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) IntStream(java.util.stream.IntStream) CrashLoopType(com.hubspot.singularity.CrashLoopType) SingularityRequest(com.hubspot.singularity.SingularityRequest) DeployManager(com.hubspot.singularity.data.DeployManager) TaskFailureType(com.hubspot.singularity.TaskFailureType) Inject(com.google.inject.Inject) RequestManager(com.hubspot.singularity.data.RequestManager) LoggerFactory(org.slf4j.LoggerFactory) Singleton(javax.inject.Singleton) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) ArrayList(java.util.ArrayList) TaskFailureEvent(com.hubspot.singularity.TaskFailureEvent) Map(java.util.Map) TaskManager(com.hubspot.singularity.data.TaskManager) SingularityConfiguration(com.hubspot.singularity.config.SingularityConfiguration) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) CrashLoopConfiguration(com.hubspot.singularity.config.CrashLoopConfiguration) Logger(org.slf4j.Logger) SingularityPendingDeploy(com.hubspot.singularity.SingularityPendingDeploy) Collectors(java.util.stream.Collectors) RequestState(com.hubspot.singularity.RequestState) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) Optional(java.util.Optional) Comparator(java.util.Comparator) CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) ArrayList(java.util.ArrayList) SingularityPendingDeploy(com.hubspot.singularity.SingularityPendingDeploy) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) TaskFailureEvent(com.hubspot.singularity.TaskFailureEvent)

Example 7 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityScheduler method updateDeployStatistics.

private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult, Protos.TaskStatus status) {
    SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();
    if (!state.isFailed()) {
        if (bldr.getAverageRuntimeMillis().isPresent()) {
            long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1);
            bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
        } else {
            bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
        }
    }
    if (task.isPresent()) {
        long dueTime = task.get().getTaskRequest().getPendingTask().getPendingTaskId().getNextRunAt();
        long startedAt = taskId.getStartedAt();
        if (bldr.getAverageSchedulingDelayMillis().isPresent()) {
            long newAverageSchedulingDelayMillis = (bldr.getAverageSchedulingDelayMillis().get() * bldr.getNumTasks() + (startedAt - dueTime)) / (bldr.getNumTasks() + 1);
            bldr.setAverageSchedulingDelayMillis(Optional.of(newAverageSchedulingDelayMillis));
        } else {
            bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
        }
    }
    bldr.setNumTasks(bldr.getNumTasks() + 1);
    if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
        bldr.setLastFinishAt(Optional.of(timestamp));
        bldr.setLastTaskState(Optional.of(state));
    }
    if (task.isPresent() && task.get().getTaskRequest().getRequest().isLongRunning() && state == ExtendedTaskState.TASK_FINISHED) {
        bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.UNEXPECTED_EXIT));
    }
    if (state == ExtendedTaskState.TASK_KILLED) {
        if (status.hasMessage()) {
            Optional<TaskCleanupType> maybeCleanupType = getCleanupType(taskId, status.getMessage());
            if (maybeCleanupType.isPresent() && (maybeCleanupType.get() == TaskCleanupType.OVERDUE_NEW_TASK || maybeCleanupType.get() == TaskCleanupType.UNHEALTHY_NEW_TASK)) {
                bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.STARTUP_FAILURE));
            }
        }
    }
    if (!state.isSuccess()) {
        if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) {
            LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown/crash loop", taskId, state);
        } else {
            if (state.isFailed()) {
                if ((status.hasMessage() && status.getMessage().contains("Memory limit exceeded")) || (status.hasReason() && status.getReason() == Reason.REASON_CONTAINER_LIMITATION_MEMORY)) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.OOM));
                } else if (status.hasReason() && status.getReason() == Reason.REASON_CONTAINER_LIMITATION_DISK) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.OUT_OF_DISK_SPACE));
                } else {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.BAD_EXIT_CODE));
                }
            }
            if (state == ExtendedTaskState.TASK_LOST && status.hasReason()) {
                if (isMesosError(status.getReason())) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.MESOS_ERROR));
                } else if (isLostAgent(status.getReason())) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.LOST_SLAVE));
                }
            }
            bldr.setNumSuccess(0);
            bldr.setNumFailures(bldr.getNumFailures() + 1);
        }
    } else {
        bldr.setNumSuccess(bldr.getNumSuccess() + 1);
        bldr.setNumFailures(0);
    }
    if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
        bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
    } else {
        bldr.setNumSequentialRetries(0);
    }
    bldr.trimTaskFailureEvents(50);
    final SingularityDeployStatistics newStatistics = bldr.build();
    LOG.trace("Saving new deploy statistics {}", newStatistics);
    deployManager.saveDeployStatistics(newStatistics);
}
Also used : TaskCleanupType(com.hubspot.singularity.TaskCleanupType) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityDeployStatisticsBuilder(com.hubspot.singularity.SingularityDeployStatisticsBuilder) TaskFailureEvent(com.hubspot.singularity.TaskFailureEvent)

Example 8 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityScheduler method handlePendingRequestsForDeployKey.

private void handlePendingRequestsForDeployKey(AtomicInteger obsoleteRequests, AtomicInteger heldForScheduledActiveTask, AtomicInteger totalNewScheduledTasks, SingularityDeployKey deployKey, List<SingularityPendingRequest> pendingRequestsForDeploy) {
    final String requestId = deployKey.getRequestId();
    final Optional<SingularityRequestWithState> maybeRequest = requestManager.getRequest(requestId);
    final SingularityDeployStatistics deployStatistics = getDeployStatistics(deployKey.getRequestId(), deployKey.getDeployId());
    if (!isRequestActive(maybeRequest)) {
        LOG.info("Pending request {} was obsolete (request {})", requestId, SingularityRequestWithState.getRequestState(maybeRequest));
        obsoleteRequests.getAndIncrement();
        for (SingularityPendingRequest pendingRequest : pendingRequestsForDeploy) {
            requestManager.deletePendingRequest(pendingRequest);
        }
        return;
    }
    SingularityRequestWithState request = maybeRequest.get();
    Optional<SingularityRequestDeployState> maybeRequestDeployState = deployManager.getRequestDeployState(requestId);
    Optional<SingularityPendingDeploy> maybePendingDeploy = deployManager.getPendingDeploy(requestId);
    List<SingularityTaskId> matchingTaskIds = getMatchingTaskIds(request.getRequest(), deployKey);
    List<SingularityPendingRequest> effectivePendingRequests = new ArrayList<>();
    // Things that are closest to now (ie smaller timestamps) should come first in the queue
    pendingRequestsForDeploy.sort(Comparator.comparingLong(SingularityPendingRequest::getTimestamp));
    int scheduledTasks = 0;
    for (SingularityPendingRequest pendingRequest : pendingRequestsForDeploy) {
        final SingularityRequest updatedRequest = updatedRequest(maybePendingDeploy, pendingRequest, request);
        if (!shouldScheduleTasks(updatedRequest, pendingRequest, maybePendingDeploy, maybeRequestDeployState)) {
            LOG.info("Pending request {} was obsolete (request {})", pendingRequest, SingularityRequestWithState.getRequestState(maybeRequest));
            obsoleteRequests.getAndIncrement();
            requestManager.deletePendingRequest(pendingRequest);
            continue;
        }
        int missingInstances = getNumMissingInstances(matchingTaskIds, updatedRequest, pendingRequest, maybePendingDeploy);
        boolean scheduledRequestWithActiveTask = (missingInstances == 0 && !matchingTaskIds.isEmpty() && updatedRequest.isScheduled() && pendingRequest.getPendingType() == PendingType.NEW_DEPLOY);
        boolean scheduledRequestWithOutdatedActiveTask = (missingInstances == 0 && matchingTaskIds.isEmpty() && updatedRequest.isScheduled() && (pendingRequest.getPendingType() == PendingType.NEW_DEPLOY || pendingRequest.getPendingType() == PendingType.STARTUP));
        if (scheduledRequestWithActiveTask || scheduledRequestWithOutdatedActiveTask) {
            LOG.trace("Holding pending request {} because it is scheduled and has an active task", pendingRequest);
            heldForScheduledActiveTask.getAndIncrement();
            continue;
        }
        if (effectivePendingRequests.isEmpty()) {
            effectivePendingRequests.add(pendingRequest);
            RequestState requestState = checkCooldown(request.getState(), request.getRequest(), deployStatistics);
            scheduledTasks += scheduleTasks(request.getRequest(), requestState, pendingRequest, matchingTaskIds, maybePendingDeploy);
            requestManager.deletePendingRequest(pendingRequest);
        } else if (pendingRequest.getPendingType() == PendingType.IMMEDIATE) {
            effectivePendingRequests.add(pendingRequest);
            RequestState requestState = checkCooldown(request.getState(), request.getRequest(), deployStatistics);
            scheduledTasks += scheduleTasks(request.getRequest(), requestState, pendingRequest, matchingTaskIds, maybePendingDeploy);
            requestManager.deletePendingRequest(pendingRequest);
        } else if (pendingRequest.getPendingType() == PendingType.ONEOFF) {
            effectivePendingRequests.add(pendingRequest);
            RequestState requestState = checkCooldown(request.getState(), request.getRequest(), deployStatistics);
            scheduledTasks += scheduleTasks(request.getRequest(), requestState, pendingRequest, matchingTaskIds, maybePendingDeploy);
            requestManager.deletePendingRequest(pendingRequest);
        } else if (updatedRequest.isScheduled() && (pendingRequest.getPendingType() == PendingType.NEW_DEPLOY || pendingRequest.getPendingType() == PendingType.TASK_DONE)) {
            // If we are here, there is already an immediate of run of the scheduled task launched. Drop anything that would
            // leave a second instance of the request in the pending queue.
            requestManager.deletePendingRequest(pendingRequest);
        }
    // Any other subsequent requests are not honored until after the pending queue is cleared.
    }
    totalNewScheduledTasks.getAndAdd(scheduledTasks);
}
Also used : SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) ArrayList(java.util.ArrayList) SingularityRequest(com.hubspot.singularity.SingularityRequest) SingularityRequestDeployState(com.hubspot.singularity.SingularityRequestDeployState) RequestState(com.hubspot.singularity.RequestState) SingularityPendingDeploy(com.hubspot.singularity.SingularityPendingDeploy) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityTaskId(com.hubspot.singularity.SingularityTaskId)

Example 9 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityJobPoller method getExpectedRuntime.

private Optional<Long> getExpectedRuntime(SingularityRequest request, SingularityTaskId taskId) {
    if (request.getScheduledExpectedRuntimeMillis().isPresent()) {
        return request.getScheduledExpectedRuntimeMillis();
    } else {
        final Optional<SingularityDeployStatistics> deployStatistics = deployManager.getDeployStatistics(taskId.getRequestId(), taskId.getDeployId());
        if (deployStatistics.isPresent() && deployStatistics.get().getAverageRuntimeMillis().isPresent()) {
            return deployStatistics.get().getAverageRuntimeMillis();
        }
        String scheduleExpression = request.getScheduleTypeSafe() == ScheduleType.RFC5545 ? request.getSchedule().get() : request.getQuartzScheduleSafe();
        Date nextRunAtDate;
        try {
            if (request.getScheduleTypeSafe() == ScheduleType.RFC5545) {
                final RFC5545Schedule rfc5545Schedule = new RFC5545Schedule(scheduleExpression);
                nextRunAtDate = rfc5545Schedule.getNextValidTime();
            } else {
                final CronExpression cronExpression = new CronExpression(scheduleExpression);
                final Date startDate = new Date(taskId.getStartedAt());
                nextRunAtDate = cronExpression.getNextValidTimeAfter(startDate);
            }
            if (nextRunAtDate == null) {
                String msg = String.format("No next run date found for %s (%s)", taskId, scheduleExpression);
                LOG.warn(msg);
                exceptionNotifier.notify(msg, ImmutableMap.of("taskId", taskId.toString()));
                return Optional.empty();
            }
        } catch (ParseException | InvalidRecurrenceRuleException e) {
            LOG.warn("Unable to parse schedule of type {} for expression {} (taskId: {}, err: {})", request.getScheduleTypeSafe(), scheduleExpression, taskId, e);
            exceptionNotifier.notify(String.format("Unable to parse schedule (%s)", e.getMessage()), e, ImmutableMap.of("taskId", taskId.toString(), "scheduleExpression", scheduleExpression, "scheduleType", request.getScheduleTypeSafe().toString()));
            return Optional.empty();
        }
        return Optional.of(nextRunAtDate.getTime() - taskId.getStartedAt());
    }
}
Also used : RFC5545Schedule(com.hubspot.singularity.helpers.RFC5545Schedule) CronExpression(org.quartz.CronExpression) ParseException(java.text.ParseException) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Date(java.util.Date) InvalidRecurrenceRuleException(org.dmfs.rfc5545.recur.InvalidRecurrenceRuleException)

Example 10 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCrashLoopTest method itDetectsSlowConsistentFailureLoops.

@Test
public void itDetectsSlowConsistentFailureLoops() {
    initRequestWithType(RequestType.WORKER, false);
    initFirstDeploy();
    long now = System.currentTimeMillis();
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(30), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(60), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(90), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(120), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(150), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(180), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(210), TaskFailureType.BAD_EXIT_CODE);
    // skip one
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(270), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(300), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(330), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(360), TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(390), TaskFailureType.BAD_EXIT_CODE);
    SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
    List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
    Assertions.assertEquals(1, active.size());
    Assertions.assertEquals(CrashLoopType.SLOW_FAILURES, Iterables.getOnlyElement(active).getType());
}
Also used : CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Test(org.junit.jupiter.api.Test)

Aggregations

SingularityDeployStatistics (com.hubspot.singularity.SingularityDeployStatistics)26 Test (org.junit.jupiter.api.Test)14 CrashLoopInfo (com.hubspot.singularity.CrashLoopInfo)10 SingularityTask (com.hubspot.singularity.SingularityTask)9 SingularityRequest (com.hubspot.singularity.SingularityRequest)7 SingularityRunNowRequestBuilder (com.hubspot.singularity.SingularityRunNowRequestBuilder)5 Inject (com.google.inject.Inject)4 SingularityDeployKey (com.hubspot.singularity.SingularityDeployKey)4 SingularityRequestDeployState (com.hubspot.singularity.SingularityRequestDeployState)4 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)4 List (java.util.List)4 Optional (java.util.Optional)4 SingularityRequestWithState (com.hubspot.singularity.SingularityRequestWithState)3 SingularityTaskCleanup (com.hubspot.singularity.SingularityTaskCleanup)3 SingularityTaskHistoryUpdate (com.hubspot.singularity.SingularityTaskHistoryUpdate)3 SingularityConfiguration (com.hubspot.singularity.config.SingularityConfiguration)3 DeployManager (com.hubspot.singularity.data.DeployManager)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 TimeUnit (java.util.concurrent.TimeUnit)3