Search in sources :

Example 1 with SingularityDeployStatisticsBuilder

use of com.hubspot.singularity.SingularityDeployStatisticsBuilder in project Singularity by HubSpot.

the class SingularityScheduler method updateDeployStatistics.

private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult, Protos.TaskStatus status) {
    SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();
    if (!state.isFailed()) {
        if (bldr.getAverageRuntimeMillis().isPresent()) {
            long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1);
            bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
        } else {
            bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
        }
    }
    if (task.isPresent()) {
        long dueTime = task.get().getTaskRequest().getPendingTask().getPendingTaskId().getNextRunAt();
        long startedAt = taskId.getStartedAt();
        if (bldr.getAverageSchedulingDelayMillis().isPresent()) {
            long newAverageSchedulingDelayMillis = (bldr.getAverageSchedulingDelayMillis().get() * bldr.getNumTasks() + (startedAt - dueTime)) / (bldr.getNumTasks() + 1);
            bldr.setAverageSchedulingDelayMillis(Optional.of(newAverageSchedulingDelayMillis));
        } else {
            bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
        }
    }
    bldr.setNumTasks(bldr.getNumTasks() + 1);
    if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
        bldr.setLastFinishAt(Optional.of(timestamp));
        bldr.setLastTaskState(Optional.of(state));
    }
    if (task.isPresent() && task.get().getTaskRequest().getRequest().isLongRunning() && state == ExtendedTaskState.TASK_FINISHED) {
        bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.UNEXPECTED_EXIT));
    }
    if (state == ExtendedTaskState.TASK_KILLED) {
        if (status.hasMessage()) {
            Optional<TaskCleanupType> maybeCleanupType = getCleanupType(taskId, status.getMessage());
            if (maybeCleanupType.isPresent() && (maybeCleanupType.get() == TaskCleanupType.OVERDUE_NEW_TASK || maybeCleanupType.get() == TaskCleanupType.UNHEALTHY_NEW_TASK)) {
                bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.STARTUP_FAILURE));
            }
        }
    }
    if (!state.isSuccess()) {
        if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) {
            LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown/crash loop", taskId, state);
        } else {
            if (state.isFailed()) {
                if ((status.hasMessage() && status.getMessage().contains("Memory limit exceeded")) || (status.hasReason() && status.getReason() == Reason.REASON_CONTAINER_LIMITATION_MEMORY)) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.OOM));
                } else if (status.hasReason() && status.getReason() == Reason.REASON_CONTAINER_LIMITATION_DISK) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.OUT_OF_DISK_SPACE));
                } else {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.BAD_EXIT_CODE));
                }
            }
            if (state == ExtendedTaskState.TASK_LOST && status.hasReason()) {
                if (isMesosError(status.getReason())) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.MESOS_ERROR));
                } else if (isLostAgent(status.getReason())) {
                    bldr.addTaskFailureEvent(new TaskFailureEvent(taskId.getInstanceNo(), timestamp, TaskFailureType.LOST_SLAVE));
                }
            }
            bldr.setNumSuccess(0);
            bldr.setNumFailures(bldr.getNumFailures() + 1);
        }
    } else {
        bldr.setNumSuccess(bldr.getNumSuccess() + 1);
        bldr.setNumFailures(0);
    }
    if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
        bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
    } else {
        bldr.setNumSequentialRetries(0);
    }
    bldr.trimTaskFailureEvents(50);
    final SingularityDeployStatistics newStatistics = bldr.build();
    LOG.trace("Saving new deploy statistics {}", newStatistics);
    deployManager.saveDeployStatistics(newStatistics);
}
Also used : TaskCleanupType(com.hubspot.singularity.TaskCleanupType) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityDeployStatisticsBuilder(com.hubspot.singularity.SingularityDeployStatisticsBuilder) TaskFailureEvent(com.hubspot.singularity.TaskFailureEvent)

Example 2 with SingularityDeployStatisticsBuilder

use of com.hubspot.singularity.SingularityDeployStatisticsBuilder in project Singularity by HubSpot.

the class SingularityScheduler method updateDeployStatistics.

private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) {
    SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder();
    if (!state.isFailed()) {
        if (bldr.getAverageRuntimeMillis().isPresent()) {
            long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1);
            bldr.setAverageRuntimeMillis(Optional.of(newAvgRuntimeMillis));
        } else {
            bldr.setAverageRuntimeMillis(Optional.of(timestamp - taskId.getStartedAt()));
        }
    }
    if (task.isPresent()) {
        long dueTime = task.get().getTaskRequest().getPendingTask().getPendingTaskId().getNextRunAt();
        long startedAt = taskId.getStartedAt();
        if (bldr.getAverageSchedulingDelayMillis().isPresent()) {
            long newAverageSchedulingDelayMillis = (bldr.getAverageSchedulingDelayMillis().get() * bldr.getNumTasks() + (startedAt - dueTime)) / (bldr.getNumTasks() + 1);
            bldr.setAverageSchedulingDelayMillis(Optional.of(newAverageSchedulingDelayMillis));
        } else {
            bldr.setAverageSchedulingDelayMillis(Optional.of(startedAt - dueTime));
        }
        final SingularityDeployStatistics newStatistics = bldr.build();
        deployManager.saveDeployStatistics(newStatistics);
    }
    bldr.setNumTasks(bldr.getNumTasks() + 1);
    if (!bldr.getLastFinishAt().isPresent() || timestamp > bldr.getLastFinishAt().get()) {
        bldr.setLastFinishAt(Optional.of(timestamp));
        bldr.setLastTaskState(Optional.of(state));
    }
    final ListMultimap<Integer, Long> instanceSequentialFailureTimestamps = bldr.getInstanceSequentialFailureTimestamps();
    final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo());
    if (!state.isSuccess()) {
        if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) {
            LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
        } else {
            if (sequentialFailureTimestamps.size() < configuration.getCooldownAfterFailures()) {
                sequentialFailureTimestamps.add(timestamp);
            } else if (timestamp > sequentialFailureTimestamps.get(0)) {
                sequentialFailureTimestamps.set(0, timestamp);
            }
            bldr.setNumFailures(bldr.getNumFailures() + 1);
            Collections.sort(sequentialFailureTimestamps);
        }
    } else {
        bldr.setNumSuccess(bldr.getNumSuccess() + 1);
        sequentialFailureTimestamps.clear();
    }
    if (scheduleResult.isPresent() && scheduleResult.get() == PendingType.RETRY) {
        bldr.setNumSequentialRetries(bldr.getNumSequentialRetries() + 1);
    } else {
        bldr.setNumSequentialRetries(0);
    }
    final SingularityDeployStatistics newStatistics = bldr.build();
    LOG.trace("Saving new deploy statistics {}", newStatistics);
    deployManager.saveDeployStatistics(newStatistics);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityDeployStatisticsBuilder(com.hubspot.singularity.SingularityDeployStatisticsBuilder)

Aggregations

SingularityDeployStatistics (com.hubspot.singularity.SingularityDeployStatistics)2 SingularityDeployStatisticsBuilder (com.hubspot.singularity.SingularityDeployStatisticsBuilder)2 TaskCleanupType (com.hubspot.singularity.TaskCleanupType)1 TaskFailureEvent (com.hubspot.singularity.TaskFailureEvent)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1