use of com.hubspot.singularity.SingularityDisasterDataPoint in project Singularity by HubSpot.
the class SingularityDisasterDetectionPoller method tooManyLostTasks.
private boolean tooManyLostTasks(long now, List<SingularityDisasterDataPoint> dataPoints) {
int totalLostTasks = 0;
for (SingularityDisasterDataPoint dataPoint : dataPoints) {
if (now - dataPoint.getTimestamp() < disasterConfiguration.getIncludeLostTasksInLastMillis()) {
totalLostTasks += dataPoint.getNumLostTasks();
}
}
double lostTasksPortion = totalLostTasks / (double) Math.max(dataPoints.get(0).getNumActiveTasks(), 1);
return lostTasksPortion > disasterConfiguration.getCriticalLostTaskPortion();
}
use of com.hubspot.singularity.SingularityDisasterDataPoint in project Singularity by HubSpot.
the class SingularityDisasterDetectionPoller method tooManyLostSlaves.
private boolean tooManyLostSlaves(long now, List<SingularityDisasterDataPoint> dataPoints) {
int totalLostSlaves = 0;
for (SingularityDisasterDataPoint dataPoint : dataPoints) {
if (now - dataPoint.getTimestamp() < disasterConfiguration.getIncludeLostSlavesInLastMillis()) {
totalLostSlaves += dataPoint.getNumLostSlaves();
}
}
double lostSlavesPortion = totalLostSlaves / (double) (Math.max(dataPoints.get(0).getNumActiveSlaves() + dataPoints.get(0).getNumLostSlaves(), 1));
return lostSlavesPortion > disasterConfiguration.getCriticalLostSlavePortion();
}
use of com.hubspot.singularity.SingularityDisasterDataPoint in project Singularity by HubSpot.
the class SingularityDisasterDetectionPoller method runActionOnPoll.
@Override
public void runActionOnPoll() {
LOG.trace("Starting disaster detection");
clearExpiredDisabledActions();
List<SingularityDisasterType> previouslyActiveDisasters = disasterManager.getActiveDisasters();
List<SingularityDisasterDataPoint> dataPoints = disasterManager.getDisasterStats().getDataPoints();
SingularityDisasterDataPoint newStats = collectDisasterStats();
dataPoints.add(0, newStats);
if (dataPoints.size() > disasterConfiguration.getStatsHistorySize()) {
dataPoints.remove(dataPoints.size() - 1);
}
LOG.debug("Collected new disaster detection dataPoints: {}", newStats);
List<SingularityDisasterType> newActiveDisasters = checkDataPoints(dataPoints);
if (!newActiveDisasters.isEmpty()) {
LOG.warn("Detected new active disasters: {}", newActiveDisasters);
}
disasterManager.updateActiveDisasters(previouslyActiveDisasters, newActiveDisasters);
disasterManager.saveDisasterStats(new SingularityDisasterDataPoints(dataPoints));
if (!newActiveDisasters.isEmpty()) {
if (!disasterManager.isAutomatedDisabledActionsDisabled()) {
disasterManager.addDisabledActionsForDisasters(newActiveDisasters);
}
if (!previouslyActiveDisasters.containsAll(newActiveDisasters)) {
queueDisasterEmail(dataPoints, newActiveDisasters);
}
} else {
disasterManager.clearSystemGeneratedDisabledActions();
}
}
use of com.hubspot.singularity.SingularityDisasterDataPoint in project Singularity by HubSpot.
the class SingularityDisasterDetectionPoller method collectDisasterStats.
private SingularityDisasterDataPoint collectDisasterStats() {
long now = System.currentTimeMillis();
int numActiveTasks = taskManager.getNumActiveTasks();
List<SingularityPendingTaskId> pendingTasks = taskManager.getPendingTaskIds();
int numPendingTasks = pendingTasks.size();
int numLateTasks = 0;
long totalTaskLagMillis = 0;
int numPastDueTasks = 0;
for (SingularityPendingTaskId pendingTask : pendingTasks) {
long taskLagMillis = now - pendingTask.getNextRunAt();
if (taskLagMillis > 0) {
numPastDueTasks++;
totalTaskLagMillis += taskLagMillis;
if (taskLagMillis > configuration.getDeltaAfterWhichTasksAreLateMillis()) {
numLateTasks++;
}
}
}
long avgTaskLagMillis = totalTaskLagMillis / Math.max(numPastDueTasks, 1);
List<SingularitySlave> slaves = slaveManager.getObjects();
int numRunningSlaves = 0;
for (SingularitySlave slave : slaves) {
if (slave.getCurrentState().getState() != MachineState.DEAD && slave.getCurrentState().getState() != MachineState.MISSING_ON_STARTUP) {
numRunningSlaves++;
}
}
int numLostSlaves = activeSlavesLost.getAndSet(0);
int numLostTasks = 0;
for (Reason lostTaskReason : disasterConfiguration.getLostTaskReasons()) {
numLostTasks += taskLostReasons.count(lostTaskReason);
}
taskLostReasons.clear();
return new SingularityDisasterDataPoint(now, numActiveTasks, numPendingTasks, numLateTasks, avgTaskLagMillis, numLostTasks, numRunningSlaves, numLostSlaves);
}
use of com.hubspot.singularity.SingularityDisasterDataPoint in project Singularity by HubSpot.
the class SingularityDisasterDetectionPoller method tooMuchTaskLag.
private boolean tooMuchTaskLag(long now, List<SingularityDisasterDataPoint> dataPoints) {
Optional<Long> criticalAvgLagTriggeredSince = Optional.absent();
Optional<Long> warningAvgLagTriggeredSince = Optional.absent();
Optional<Long> criticalPortionTriggeredSince = Optional.absent();
Optional<Long> warningPortionTriggeredSince = Optional.absent();
for (SingularityDisasterDataPoint dataPoint : dataPoints) {
double overdueTaskPortion = dataPoint.getNumLateTasks() / (double) Math.max((dataPoint.getNumActiveTasks() + dataPoint.getNumPendingTasks()), 1);
boolean criticalOverdueTasksPortion = overdueTaskPortion > disasterConfiguration.getCriticalOverdueTaskPortion();
boolean warningOverdueTasksPortion = overdueTaskPortion > disasterConfiguration.getWarningOverdueTaskPortion();
boolean criticalAvgTaskLag = dataPoint.getAvgTaskLagMillis() > disasterConfiguration.getCriticalAvgTaskLagMillis() && warningOverdueTasksPortion;
boolean warningAvgTaskLag = dataPoint.getAvgTaskLagMillis() > disasterConfiguration.getWarningAvgTaskLagMillis();
if (criticalOverdueTasksPortion) {
criticalPortionTriggeredSince = Optional.of(dataPoint.getTimestamp());
}
if (warningOverdueTasksPortion) {
warningPortionTriggeredSince = Optional.of(dataPoint.getTimestamp());
}
if (criticalAvgTaskLag) {
criticalAvgLagTriggeredSince = Optional.of(dataPoint.getTimestamp());
}
if (warningAvgTaskLag) {
warningAvgLagTriggeredSince = Optional.of(dataPoint.getTimestamp());
}
if (!criticalOverdueTasksPortion && !warningOverdueTasksPortion && !criticalAvgTaskLag && !warningAvgTaskLag) {
break;
}
}
// 'true' if either critical condition is met
if ((criticalAvgLagTriggeredSince.isPresent() && now - criticalAvgLagTriggeredSince.get() > disasterConfiguration.getTriggerAfterMillisOverTaskLagThreshold()) || (criticalPortionTriggeredSince.isPresent() && now - criticalPortionTriggeredSince.get() > disasterConfiguration.getTriggerAfterMillisOverTaskLagThreshold())) {
return true;
}
// 'true' if both warning conditions are met
return warningAvgLagTriggeredSince.isPresent() && now - warningAvgLagTriggeredSince.get() > disasterConfiguration.getTriggerAfterMillisOverTaskLagThreshold() && warningPortionTriggeredSince.isPresent() && now - warningPortionTriggeredSince.get() > disasterConfiguration.getTriggerAfterMillisOverTaskLagThreshold();
}
Aggregations