use of com.hubspot.singularity.CrashLoopInfo in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDetectsFastFailureLoopsForNonLongRunning.
@Test
public void itDetectsFastFailureLoopsForNonLongRunning() {
initRequestWithType(RequestType.ON_DEMAND, false);
initFirstDeploy();
long now = System.currentTimeMillis();
createTaskFailure(1, now - 1000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 10000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 20000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 30000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 45000, TaskFailureType.BAD_EXIT_CODE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertEquals(1, active.size());
Assertions.assertEquals(CrashLoopType.FAST_FAILURE_LOOP, Iterables.getOnlyElement(active).getType());
}
use of com.hubspot.singularity.CrashLoopInfo in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDoesNotTriggerWhenFailuresAreNotRecentEnough.
@Test
public void itDoesNotTriggerWhenFailuresAreNotRecentEnough() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
// 3 failures meets threshold, but latest must be < ~8mins ago for single instance fail loop
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(10), TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(15), TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(20), TaskFailureType.BAD_EXIT_CODE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertTrue(active.isEmpty());
}
use of com.hubspot.singularity.CrashLoopInfo in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDetectsTooManyMultiInstanceFailures.
@Test
public void itDetectsTooManyMultiInstanceFailures() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(1), TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(2, now - TimeUnit.MINUTES.toMillis(4), TaskFailureType.OOM);
createTaskFailure(6, now - TimeUnit.MINUTES.toMillis(5), TaskFailureType.OUT_OF_DISK_SPACE);
createTaskFailure(3, now - TimeUnit.MINUTES.toMillis(7), TaskFailureType.OUT_OF_DISK_SPACE);
createTaskFailure(4, now - TimeUnit.MINUTES.toMillis(10), TaskFailureType.OOM);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(12), TaskFailureType.OUT_OF_DISK_SPACE);
createTaskFailure(5, now - TimeUnit.MINUTES.toMillis(16), TaskFailureType.BAD_EXIT_CODE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertEquals(1, active.size());
Assertions.assertEquals(CrashLoopType.MULTI_INSTANCE_FAILURE, Iterables.getOnlyElement(active).getType());
}
use of com.hubspot.singularity.CrashLoopInfo in project Singularity by HubSpot.
the class SingularityCrashLoops method getUnexpectedExitLoop.
/*
* Unexpected Exits. Too many task finished from a long-running type in X minutes
*/
private Optional<CrashLoopInfo> getUnexpectedExitLoop(long now, SingularityDeployStatistics deployStatistics) {
// TODO - configurable?
long thresholdUnexpectedExitTime = now - TimeUnit.MINUTES.toMillis(30);
List<Long> recentUnexpectedExits = deployStatistics.getTaskFailureEvents().stream().filter(e -> e.getType() == TaskFailureType.UNEXPECTED_EXIT && e.getTimestamp() > thresholdUnexpectedExitTime).map(TaskFailureEvent::getTimestamp).collect(Collectors.toList());
if (recentUnexpectedExits.size() > 4) {
// TODO - configurable?
return Optional.of(new CrashLoopInfo(deployStatistics.getRequestId(), deployStatistics.getDeployId(), recentUnexpectedExits.stream().min(Comparator.comparingLong(Long::longValue)).get(), Optional.empty(), CrashLoopType.UNEXPECTED_EXITS));
}
return Optional.empty();
}
use of com.hubspot.singularity.CrashLoopInfo in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDetectsStartupFailureLoops.
@Test
public void itDetectsStartupFailureLoops() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
SingularityTask task = startTask(firstDeploy, 1);
taskManager.createTaskCleanup(new SingularityTaskCleanup(Optional.empty(), TaskCleanupType.UNHEALTHY_NEW_TASK, now - 30000, task.getTaskId(), Optional.empty(), Optional.empty(), Optional.empty()));
createTaskFailure(1, now - 10000, TaskFailureType.STARTUP_FAILURE);
createTaskFailure(1, now - 15000, TaskFailureType.STARTUP_FAILURE);
createTaskFailure(1, now - 20000, TaskFailureType.STARTUP_FAILURE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertEquals(1, active.size());
Assertions.assertEquals(CrashLoopType.STARTUP_FAILURE_LOOP, Iterables.getOnlyElement(active).getType());
}
Aggregations