use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.
the class SingularityScheduler method updateDeployStatistics.
private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) {
SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();
if (!state.isFailed()) {
if (bldr.getAverageRuntimeMillis().isPresent()) {
long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1);
bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
} else {
bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
}
}
if (task.isPresent()) {
long dueTime = task.get().getTaskRequest().getPendingTask().getPendingTaskId().getNextRunAt();
long startedAt = taskId.getStartedAt();
if (bldr.getAverageSchedulingDelayMillis().isPresent()) {
long newAverageSchedulingDelayMillis = (bldr.getAverageSchedulingDelayMillis().get() * bldr.getNumTasks() + (startedAt - dueTime)) / (bldr.getNumTasks() + 1);
bldr.setAverageSchedulingDelayMillis(Optional.of(newAverageSchedulingDelayMillis));
} else {
bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
}
final SingularityDeployStatistics newStatistics = bldr.build();
deployManager.saveDeployStatistics(newStatistics);
}
bldr.setNumTasks(bldr.getNumTasks() + 1);
if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
bldr.setLastFinishAt(Optional.of(timestamp));
bldr.setLastTaskState(Optional.of(state));
}
final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps();
final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo());
if (!state.isSuccess()) {
if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) {
LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
} else {
if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) {
sequentialFailureTimestamps.add(timestamp);
} else if (timestamp > sequentialFailureTimestamps.get(0)) {
sequentialFailureTimestamps.set(0, timestamp);
}
bldr.setNumFailures(bldr.getNumFailures() + 1);
Collections.sort(sequentialFailureTimestamps);
}
} else {
bldr.setNumSuccess(bldr.getNumSuccess() + 1);
sequentialFailureTimestamps.clear();
}
if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
} else {
bldr.setNumSequentialRetries(0);
}
final SingularityDeployStatistics newStatistics = bldr.build();
LOG.trace("Saving new deploy statistics {}", newStatistics);
deployManager.saveDeployStatistics(newStatistics);
}
use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.
the class SingularityCooldownChecker method shouldExitCooldown.
private boolean shouldExitCooldown(SingularityRequestWithState cooldownRequest) {
Optional<SingularityRequestDeployState> maybeDeployState = deployManager.getRequestDeployState(cooldownRequest.getRequest().getId());
if (!maybeDeployState.isPresent() || !maybeDeployState.get().getActiveDeploy().isPresent()) {
LOG.trace("{} had no deployState / activeDeploy {}, exiting cooldown", cooldownRequest.getRequest().getId(), maybeDeployState);
return true;
}
Optional<SingularityDeployStatistics> maybeDeployStatistics = deployManager.getDeployStatistics(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId());
if (!maybeDeployStatistics.isPresent()) {
LOG.trace("{} had no deploy statistics, exiting cooldown", new SingularityDeployKey(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId()));
return true;
}
Optional<Long> lastFinishAt = maybeDeployStatistics.get().getLastFinishAt();
if (!lastFinishAt.isPresent()) {
LOG.trace("{} had no last finish, exiting cooldown", new SingularityDeployKey(cooldownRequest.getRequest().getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId()));
return true;
}
if (cooldown.hasCooldownExpired(cooldownRequest.getRequest(), maybeDeployStatistics.get(), Optional.<Integer>absent(), Optional.<Long>absent())) {
return true;
}
return false;
}
use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDetectsStartupFailureLoops.
@Test
public void itDetectsStartupFailureLoops() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
SingularityTask task = startTask(firstDeploy, 1);
taskManager.createTaskCleanup(new SingularityTaskCleanup(Optional.empty(), TaskCleanupType.UNHEALTHY_NEW_TASK, now - 30000, task.getTaskId(), Optional.empty(), Optional.empty(), Optional.empty()));
createTaskFailure(1, now - 10000, TaskFailureType.STARTUP_FAILURE);
createTaskFailure(1, now - 15000, TaskFailureType.STARTUP_FAILURE);
createTaskFailure(1, now - 20000, TaskFailureType.STARTUP_FAILURE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertEquals(1, active.size());
Assertions.assertEquals(CrashLoopType.STARTUP_FAILURE_LOOP, Iterables.getOnlyElement(active).getType());
}
use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDetectsTooManyOoms.
@Test
public void itDetectsTooManyOoms() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(1), TaskFailureType.OOM);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(11), TaskFailureType.OOM);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(16), TaskFailureType.OOM);
createTaskFailure(1, now - TimeUnit.MINUTES.toMillis(20), TaskFailureType.OOM);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertTrue(active.size() > 1);
Assertions.assertTrue(active.stream().map(CrashLoopInfo::getType).anyMatch(l -> l == CrashLoopType.OOM));
}
use of com.hubspot.singularity.SingularityDeployStatistics in project Singularity by HubSpot.
the class SingularityCrashLoopTest method itDoesNotTriggerSlowFailureLoopForFailuresConfinedToASmallTimeRange.
@Test
public void itDoesNotTriggerSlowFailureLoopForFailuresConfinedToASmallTimeRange() {
initRequestWithType(RequestType.WORKER, false);
initFirstDeploy();
long now = System.currentTimeMillis();
// Five minutes ago, should have already cleared
createTaskFailure(1, now - 1000 - 300000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 10000 - 300000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 20000 - 300000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 30000 - 300000, TaskFailureType.BAD_EXIT_CODE);
createTaskFailure(1, now - 45000 - 300000, TaskFailureType.BAD_EXIT_CODE);
SingularityDeployStatistics deployStatistics = deployManager.getDeployStatistics(requestId, firstDeployId).get();
List<CrashLoopInfo> active = crashLoops.getActiveCrashLoops(deployStatistics);
Assertions.assertEquals(0, active.size());
}
Aggregations