Search in sources :

Example 16 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityScheduler method updateDeployStatistics.

private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) {
    SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();
    if (!state.isFailed()) {
        if (bldr.getAverageRuntimeMillis().isPresent()) {
            long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1);
            bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
        } else {
            bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
        }
    }
    if (task.isPresent()) {
        long dueTime = task.get().getTaskRequest().getPendingTask().getPendingTaskId().getNextRunAt();
        long startedAt = taskId.getStartedAt();
        if (bldr.getAverageSchedulingDelayMillis().isPresent()) {
            long newAverageSchedulingDelayMillis = (bldr.getAverageSchedulingDelayMillis().get() * bldr.getNumTasks() + (startedAt - dueTime)) / (bldr.getNumTasks() + 1);
            bldr.setAverageSchedulingDelayMillis(Optional.of(newAverageSchedulingDelayMillis));
        } else {
            bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
        }
        final SingularityDeployStatistics newStatistics = bldr.build();
        deployManager.saveDeployStatistics(newStatistics);
    }
    bldr.setNumTasks(bldr.getNumTasks() + 1);
    if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
        bldr.setLastFinishAt(Optional.of(timestamp));
        bldr.setLastTaskState(Optional.of(state));
    }
    final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps();
    final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo());
    if (!state.isSuccess()) {
        if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) {
            LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
        } else {
            if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) {
                sequentialFailureTimestamps.add(timestamp);
            } else if (timestamp > sequentialFailureTimestamps.get(0)) {
                sequentialFailureTimestamps.set(0, timestamp);
            }
            bldr.setNumFailures(bldr.getNumFailures() + 1);
            Collections.sort(sequentialFailureTimestamps);
        }
    } else {
        bldr.setNumSuccess(bldr.getNumSuccess() + 1);
        sequentialFailureTimestamps.clear();
    }
    if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
        bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
    } else {
        bldr.setNumSequentialRetries(0);
    }
    final SingularityDeployStatistics newStatistics = bldr.build();
    LOG.trace("Saving new deploy statistics {}", newStatistics);
    deployManager.saveDeployStatistics(newStatistics);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityDeployStatisticsBuilder(com.hubspot.singularity.SingularityDeployStatisticsBuilder)

Example 17 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCooldownChecker method shouldExitCooldown.

private boolean shouldExitCooldown(SingularityRequestWithState cooldownRequest) {
    Optional<SingularityRequestDeployState> maybeDeployState = deployManager.getRequestDeployState(cooldownRequest.getRequest().getId());
    if (!maybeDeployState.isPresent() || !maybeDeployState.get().getActiveDeploy().isPresent()) {
        LOG.trace("{} had no deployState / activeDeploy {}, exiting cooldown", cooldownRequest.getRequest().getId(), maybeDeployState);
        return true;
    }
    Optional<SingularityDeployStatistics> maybeDeployStatistics = deployManager.getDeployStatistics(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId());
    if (!maybeDeployStatistics.isPresent()) {
        LOG.trace("{} had no deploy statistics, exiting cooldown", new SingularityDeployKey(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId()));
        return true;
    }
    Optional<Long> lastFinishAt = maybeDeployStatistics.get().getLastFinishAt();
    if (!lastFinishAt.isPresent()) {
        LOG.trace("{} had no last finish, exiting cooldown", new SingularityDeployKey(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId()));
        return true;
    }
    if (cooldown.hasCooldownExpired(cooldownRequest.getRequest(), maybeDeployStatistics.get(), Optional.<Integer>absent(), Optional.<Long>absent())) {
        return true;
    }
    return false;
}
Also used : SingularityDeployKey(com.hubspot.singularity.SingularityDeployKey) SingularityRequestDeployState(com.hubspot.singularity.SingularityRequestDeployState) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics)

Example 18 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCrashLoopTest method itDetectsStartupFailureLoops.

@Test
public void itDetectsStartupFailureLoops() {
    initRequestWithType(RequestType.WORKER, false);
    initFirstDeploy();
    long now = System.currentTimeMillis();
    SingularityTask task = startTask(firstDeploy, 1);
    taskManager.createTaskCleanup(new SingularityTaskCleanup(Optional.empty(), TaskCleanupType.UNHEALTHY_NEW_TASK, now - 30000, task.getTaskId(), Optional.empty(), Optional.empty(), Optional.empty()));
    createTaskFailure(1, now - 10000, TaskFailureType.STARTUP_FAILURE);
    createTaskFailure(1, now - 15000, TaskFailureType.STARTUP_FAILURE);
    createTaskFailure(1, now - 20000, TaskFailureType.STARTUP_FAILURE);
    SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
    List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
    Assertions.assertEquals(1, active.size());
    Assertions.assertEquals(CrashLoopType.STARTUP_FAILURE_LOOP, Iterables.getOnlyElement(active).getType());
}
Also used : SingularityTask(com.hubspot.singularity.SingularityTask) CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Test(org.junit.jupiter.api.Test)

Example 19 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCrashLoopTest method itDetectsTooManyOoms.

@Test
public void itDetectsTooManyOoms() {
    initRequestWithType(RequestType.WORKER, false);
    initFirstDeploy();
    long now = System.currentTimeMillis();
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(1), TaskFailureType.OOM);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(11), TaskFailureType.OOM);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(16), TaskFailureType.OOM);
    createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(20), TaskFailureType.OOM);
    SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
    List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
    Assertions.assertTrue(active.size() > 1);
    Assertions.assertTrue(active.stream().map(CrashLoopInfo::getType).anyMatch(l -> l == CrashLoopType.OOM));
}
Also used : TaskStatus(org.apache.mesos.v1.Protos.TaskStatus) CrashLoopType(com.hubspot.singularity.CrashLoopType) SingularityTask(com.hubspot.singularity.SingularityTask) Iterables(com.google.common.collect.Iterables) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) Reason(org.apache.mesos.v1.Protos.TaskStatus.Reason) TaskState(org.apache.mesos.v1.Protos.TaskState) TaskFailureType(com.hubspot.singularity.TaskFailureType) Inject(com.google.inject.Inject) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.jupiter.api.Test) List(java.util.List) TaskCleanupType(com.hubspot.singularity.TaskCleanupType) CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) MesosProtosUtils(com.hubspot.singularity.helpers.MesosProtosUtils) Assertions(org.junit.jupiter.api.Assertions) Optional(java.util.Optional) RequestType(com.hubspot.singularity.RequestType) CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Test(org.junit.jupiter.api.Test)

Example 20 with SingularityDeployStatistics

use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.

the class SingularityCrashLoopTest method itDoesNotTriggerSlowFailureLoopForFailuresConfinedToASmallTimeRange.

@Test
public void itDoesNotTriggerSlowFailureLoopForFailuresConfinedToASmallTimeRange() {
    initRequestWithType(RequestType.WORKER, false);
    initFirstDeploy();
    long now = System.currentTimeMillis();
    // Five minutes ago, should have already cleared
    createTaskFailure(1, now - 1000 - 300000, TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - 10000 - 300000, TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - 20000 - 300000, TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - 30000 - 300000, TaskFailureType.BAD_EXIT_CODE);
    createTaskFailure(1, now - 45000 - 300000, TaskFailureType.BAD_EXIT_CODE);
    SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
    List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
    Assertions.assertEquals(0, active.size());
}
Also used : CrashLoopInfo(com.hubspot.singularity.CrashLoopInfo) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Test(org.junit.jupiter.api.Test)

Aggregations

SingularityDeployStatistics (com.hubspot.singularity.SingularityDeployStatistics)26 Test (org.junit.jupiter.api.Test)14 CrashLoopInfo (com.hubspot.singularity.CrashLoopInfo)10 SingularityTask (com.hubspot.singularity.SingularityTask)9 SingularityRequest (com.hubspot.singularity.SingularityRequest)7 SingularityRunNowRequestBuilder (com.hubspot.singularity.SingularityRunNowRequestBuilder)5 Inject (com.google.inject.Inject)4 SingularityDeployKey (com.hubspot.singularity.SingularityDeployKey)4 SingularityRequestDeployState (com.hubspot.singularity.SingularityRequestDeployState)4 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)4 List (java.util.List)4 Optional (java.util.Optional)4 SingularityRequestWithState (com.hubspot.singularity.SingularityRequestWithState)3 SingularityTaskCleanup (com.hubspot.singularity.SingularityTaskCleanup)3 SingularityTaskHistoryUpdate (com.hubspot.singularity.SingularityTaskHistoryUpdate)3 SingularityConfiguration (com.hubspot.singularity.config.SingularityConfiguration)3 DeployManager (com.hubspot.singularity.data.DeployManager)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 TimeUnit (java.util.concurrent.TimeUnit)3