Search in sources :

Example 1 with SingularityTaskCurrentUsage

use of com.hubspot.singularity.SingularityTaskCurrentUsage in project Singularity by HubSpot.

the class SingularityUsagePoller method collectSlaveUage.

private void collectSlaveUage(SingularitySlave slave, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable) {
    Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
    longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0);
    Optional<Long> memoryMbTotal = Optional.absent();
    Optional<Double> cpusTotal = Optional.absent();
    Optional<Long> diskMbTotal = Optional.absent();
    long memoryMbReservedOnSlave = 0;
    double cpuReservedOnSlave = 0;
    long diskMbReservedOnSlave = 0;
    long memoryBytesUsedOnSlave = 0;
    double cpusUsedOnSlave = 0;
    long diskMbUsedOnSlave = 0;
    try {
        List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost());
        MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost());
        double systemMemTotalBytes = 0;
        double systemMemFreeBytes = 0;
        double systemLoad1Min = 0;
        double systemLoad5Min = 0;
        double systemLoad15Min = 0;
        double slaveDiskUsed = 0;
        double slaveDiskTotal = 0;
        double systemCpusTotal = 0;
        if (slaveMetricsSnapshot != null) {
            systemMemTotalBytes = slaveMetricsSnapshot.getSystemMemTotalBytes();
            systemMemFreeBytes = slaveMetricsSnapshot.getSystemMemFreeBytes();
            systemLoad1Min = slaveMetricsSnapshot.getSystemLoad1Min();
            systemLoad5Min = slaveMetricsSnapshot.getSystemLoad5Min();
            systemLoad15Min = slaveMetricsSnapshot.getSystemLoad15Min();
            slaveDiskUsed = slaveMetricsSnapshot.getSlaveDiskUsed();
            slaveDiskTotal = slaveMetricsSnapshot.getSlaveDiskTotal();
            systemCpusTotal = slaveMetricsSnapshot.getSystemCpusTotal();
        }
        double systemLoad;
        switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
            case LOAD_1:
                systemLoad = systemLoad1Min;
                break;
            case LOAD_15:
                systemLoad = systemLoad15Min;
                break;
            case LOAD_5:
            default:
                systemLoad = systemLoad5Min;
                break;
        }
        boolean slaveOverloaded = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
        List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
        for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
            String taskId = taskUsage.getSource();
            SingularityTaskId task;
            try {
                task = SingularityTaskId.valueOf(taskId);
            } catch (InvalidSingularityTaskIdException e) {
                LOG.error("Couldn't get SingularityTaskId for {}", taskUsage);
                continue;
            }
            SingularityTaskUsage latestUsage = getUsage(taskUsage);
            List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(taskId);
            clearOldUsage(taskId);
            usageManager.saveSpecificTaskUsage(taskId, latestUsage);
            Optional<SingularityTask> maybeTask = taskManager.getTask(task);
            Optional<Resources> maybeResources = Optional.absent();
            if (maybeTask.isPresent()) {
                maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().or(maybeTask.get().getTaskRequest().getDeploy().getResources());
                if (maybeResources.isPresent()) {
                    Resources taskResources = maybeResources.get();
                    double memoryMbReservedForTask = taskResources.getMemoryMb();
                    double cpuReservedForTask = taskResources.getCpus();
                    double diskMbReservedForTask = taskResources.getDiskMb();
                    memoryMbReservedOnSlave += memoryMbReservedForTask;
                    cpuReservedOnSlave += cpuReservedForTask;
                    diskMbReservedOnSlave += diskMbReservedForTask;
                    updateRequestUtilization(utilizationPerRequestId, pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
                }
            }
            memoryBytesUsedOnSlave += latestUsage.getMemoryTotalBytes();
            diskMbUsedOnSlave += latestUsage.getDiskTotalBytes();
            SingularityTaskCurrentUsage currentUsage = null;
            if (pastTaskUsages.isEmpty()) {
                Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
                if (maybeStartingUpdate.isPresent()) {
                    long startTimestampSeconds = TimeUnit.MILLISECONDS.toSeconds(maybeStartingUpdate.get().getTimestamp());
                    double usedCpusSinceStart = latestUsage.getCpuSeconds() / (latestUsage.getTimestamp() - startTimestampSeconds);
                    if (isLongRunning(task) || isConsideredLongRunning(task)) {
                        updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    }
                    currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                    cpusUsedOnSlave += usedCpusSinceStart;
                }
            } else {
                SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
                double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / (latestUsage.getTimestamp() - lastUsage.getTimestamp()));
                if (isLongRunning(task) || isConsideredLongRunning(task)) {
                    updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), taskCpusUsed, latestUsage.getDiskTotalBytes());
                }
                currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, taskCpusUsed, latestUsage.getDiskTotalBytes());
                usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                cpusUsedOnSlave += taskCpusUsed;
            }
            if (configuration.isShuffleTasksForOverloadedSlaves() && currentUsage != null && currentUsage.getCpusUsed() > 0) {
                if (isLongRunning(task) && !configuration.getDoNotShuffleRequests().contains(task.getRequestId())) {
                    Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
                    if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
                        LOG.trace("Task {} already being cleaned up to spread cpu usage, skipping", taskId);
                    } else {
                        if (maybeResources.isPresent()) {
                            possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
                        }
                    }
                }
            }
        }
        if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) {
            LOG.debug("Could not find slave or resources for slave {}", slave.getId());
        } else {
            memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue());
            cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue());
            diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get());
        }
        SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(cpusUsedOnSlave, cpuReservedOnSlave, cpusTotal, memoryBytesUsedOnSlave, memoryMbReservedOnSlave, memoryMbTotal, diskMbUsedOnSlave, diskMbReservedOnSlave, diskMbTotal, longRunningTasksUsage, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, slaveDiskUsed, slaveDiskTotal);
        if (slaveOverloaded) {
            overLoadedHosts.put(slaveUsage, possibleTasksToShuffle);
        }
        List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId());
        if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) {
            usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0));
        }
        if (slaveUsage.getMemoryBytesTotal().isPresent() && slaveUsage.getCpusTotal().isPresent()) {
            totalMemBytesUsed.getAndAdd(slaveUsage.getMemoryBytesUsed());
            totalCpuUsed.getAndAdd(slaveUsage.getCpusUsed());
            totalDiskBytesUsed.getAndAdd(slaveUsage.getDiskBytesUsed());
            totalMemBytesAvailable.getAndAdd(slaveUsage.getMemoryBytesTotal().get());
            totalCpuAvailable.getAndAdd(slaveUsage.getCpusTotal().get());
            totalDiskBytesAvailable.getAndAdd(slaveUsage.getDiskBytesTotal().get());
        }
        LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage);
    } catch (Throwable t) {
        String message = String.format("Could not get slave usage for host %s", slave.getHost());
        LOG.error(message, t);
        exceptionNotifier.notify(message, t);
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityTaskUsage(com.hubspot.singularity.SingularityTaskUsage) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) MesosSlaveMetricsSnapshotObject(com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityTask(com.hubspot.singularity.SingularityTask) AtomicLong(java.util.concurrent.atomic.AtomicLong) Resources(com.hubspot.mesos.Resources)

Example 2 with SingularityTaskCurrentUsage

use of com.hubspot.singularity.SingularityTaskCurrentUsage in project Singularity by HubSpot.

the class UsageManager method getTaskCurrentUsages.

public List<SingularityTaskCurrentUsageWithId> getTaskCurrentUsages(List<SingularityTaskId> taskIds) {
    List<String> paths = new ArrayList<>(taskIds.size());
    for (SingularityTaskId taskId : taskIds) {
        paths.add(getCurrentTaskUsagePath(taskId.getId()));
    }
    Map<String, SingularityTaskCurrentUsage> currentTaskUsages = getAsyncWithPath("getTaskCurrentUsages", paths, taskCurrentUsageTranscoder);
    List<SingularityTaskCurrentUsageWithId> currentTaskUsagesWithIds = new ArrayList<>(paths.size());
    for (Entry<String, SingularityTaskCurrentUsage> entry : currentTaskUsages.entrySet()) {
        currentTaskUsagesWithIds.add(new SingularityTaskCurrentUsageWithId(SingularityTaskId.valueOf(getTaskIdFromCurrentUsagePath(entry.getKey())), entry.getValue()));
    }
    return currentTaskUsagesWithIds;
}
Also used : SingularityTaskCurrentUsageWithId(com.hubspot.singularity.SingularityTaskCurrentUsageWithId) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) ArrayList(java.util.ArrayList) SingularityTaskId(com.hubspot.singularity.SingularityTaskId)

Aggregations

SingularityTaskCurrentUsage (com.hubspot.singularity.SingularityTaskCurrentUsage)2 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)2 ArrayList (java.util.ArrayList)2 AtomicDouble (com.google.common.util.concurrent.AtomicDouble)1 Resources (com.hubspot.mesos.Resources)1 MesosSlaveMetricsSnapshotObject (com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject)1 MesosTaskMonitorObject (com.hubspot.mesos.json.MesosTaskMonitorObject)1 InvalidSingularityTaskIdException (com.hubspot.singularity.InvalidSingularityTaskIdException)1 SingularitySlaveUsage (com.hubspot.singularity.SingularitySlaveUsage)1 ResourceUsageType (com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType)1 SingularityTask (com.hubspot.singularity.SingularityTask)1 SingularityTaskCurrentUsageWithId (com.hubspot.singularity.SingularityTaskCurrentUsageWithId)1 SingularityTaskHistoryUpdate (com.hubspot.singularity.SingularityTaskHistoryUpdate)1 SingularityTaskUsage (com.hubspot.singularity.SingularityTaskUsage)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1