Search in sources :

Example 1 with ResourceUsageType

use of com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType in project Singularity by HubSpot.

the class SingularityMesosOfferSchedulerTest method itAccountsForMaxHistoricalTaskUsage.

@Test
public void itAccountsForMaxHistoricalTaskUsage() {
    try {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.PROBABLE_MAX_USAGE);
        initRequest();
        double cpuReserved = 2;
        double memMbReserved = 1000;
        initFirstDeployWithResources(cpuReserved, memMbReserved);
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(1)));
        resourceOffers(3);
        SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId.getId();
        // 2 cpus used
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 5, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u1));
        usagePoller.runActionOnPoll();
        // 1 cpus used
        MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 11, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 6, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u2));
        usagePoller.runActionOnPoll();
        Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
        longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0.1);
        SingularitySlaveUsage smallUsage = new SingularitySlaveUsage(0.1, 0.1, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), longRunningTasksUsage, 1, System.currentTimeMillis(), 1, 30000, 10, 0, 0, 0, 0, 107374182);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host1", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host2", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host3", smallUsage);
        requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), SingularityUser.DEFAULT_USER);
        Assert.assertEquals(3.0, usageManager.getRequestUtilizations().get(requestId).getCpuUsed(), 0.001);
        Offer host2Offer = createOffer(6, 30000, 107374182, "host2", "host2");
        slaveAndRackManager.checkOffer(host2Offer);
        Offer host3Offer = createOffer(6, 30000, 107374182, "host3", "host3");
        slaveAndRackManager.checkOffer(host3Offer);
        Collection<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(Arrays.asList(host2Offer, host3Offer));
        Assert.assertEquals(2, offerHolders.size());
        // A single offer should only ever get a single task even though both have room for both tasks here. Adding a task should reduce the score for the next check
        for (SingularityOfferHolder offerHolder : offerHolders) {
            Assert.assertEquals(1, offerHolder.getAcceptedTasks().size());
        }
    } finally {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.SPREAD_TASK_USAGE);
    }
}
Also used : SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) HashMap(java.util.HashMap) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Offer(org.apache.mesos.v1.Protos.Offer) SingularityScaleRequest(com.hubspot.singularity.api.SingularityScaleRequest) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) Test(org.junit.Test)

Example 2 with ResourceUsageType

use of com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType in project Singularity by HubSpot.

the class SingularityUsagePoller method collectSlaveUage.

private void collectSlaveUage(SingularitySlave slave, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable) {
    Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
    longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0);
    Optional<Long> memoryMbTotal = Optional.absent();
    Optional<Double> cpusTotal = Optional.absent();
    Optional<Long> diskMbTotal = Optional.absent();
    long memoryMbReservedOnSlave = 0;
    double cpuReservedOnSlave = 0;
    long diskMbReservedOnSlave = 0;
    long memoryBytesUsedOnSlave = 0;
    double cpusUsedOnSlave = 0;
    long diskMbUsedOnSlave = 0;
    try {
        List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost());
        MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost());
        double systemMemTotalBytes = 0;
        double systemMemFreeBytes = 0;
        double systemLoad1Min = 0;
        double systemLoad5Min = 0;
        double systemLoad15Min = 0;
        double slaveDiskUsed = 0;
        double slaveDiskTotal = 0;
        double systemCpusTotal = 0;
        if (slaveMetricsSnapshot != null) {
            systemMemTotalBytes = slaveMetricsSnapshot.getSystemMemTotalBytes();
            systemMemFreeBytes = slaveMetricsSnapshot.getSystemMemFreeBytes();
            systemLoad1Min = slaveMetricsSnapshot.getSystemLoad1Min();
            systemLoad5Min = slaveMetricsSnapshot.getSystemLoad5Min();
            systemLoad15Min = slaveMetricsSnapshot.getSystemLoad15Min();
            slaveDiskUsed = slaveMetricsSnapshot.getSlaveDiskUsed();
            slaveDiskTotal = slaveMetricsSnapshot.getSlaveDiskTotal();
            systemCpusTotal = slaveMetricsSnapshot.getSystemCpusTotal();
        }
        double systemLoad;
        switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
            case LOAD_1:
                systemLoad = systemLoad1Min;
                break;
            case LOAD_15:
                systemLoad = systemLoad15Min;
                break;
            case LOAD_5:
            default:
                systemLoad = systemLoad5Min;
                break;
        }
        boolean slaveOverloaded = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
        List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
        for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
            String taskId = taskUsage.getSource();
            SingularityTaskId task;
            try {
                task = SingularityTaskId.valueOf(taskId);
            } catch (InvalidSingularityTaskIdException e) {
                LOG.error("Couldn't get SingularityTaskId for {}", taskUsage);
                continue;
            }
            SingularityTaskUsage latestUsage = getUsage(taskUsage);
            List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(taskId);
            clearOldUsage(taskId);
            usageManager.saveSpecificTaskUsage(taskId, latestUsage);
            Optional<SingularityTask> maybeTask = taskManager.getTask(task);
            Optional<Resources> maybeResources = Optional.absent();
            if (maybeTask.isPresent()) {
                maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().or(maybeTask.get().getTaskRequest().getDeploy().getResources());
                if (maybeResources.isPresent()) {
                    Resources taskResources = maybeResources.get();
                    double memoryMbReservedForTask = taskResources.getMemoryMb();
                    double cpuReservedForTask = taskResources.getCpus();
                    double diskMbReservedForTask = taskResources.getDiskMb();
                    memoryMbReservedOnSlave += memoryMbReservedForTask;
                    cpuReservedOnSlave += cpuReservedForTask;
                    diskMbReservedOnSlave += diskMbReservedForTask;
                    updateRequestUtilization(utilizationPerRequestId, pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
                }
            }
            memoryBytesUsedOnSlave += latestUsage.getMemoryTotalBytes();
            diskMbUsedOnSlave += latestUsage.getDiskTotalBytes();
            SingularityTaskCurrentUsage currentUsage = null;
            if (pastTaskUsages.isEmpty()) {
                Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
                if (maybeStartingUpdate.isPresent()) {
                    long startTimestampSeconds = TimeUnit.MILLISECONDS.toSeconds(maybeStartingUpdate.get().getTimestamp());
                    double usedCpusSinceStart = latestUsage.getCpuSeconds() / (latestUsage.getTimestamp() - startTimestampSeconds);
                    if (isLongRunning(task) || isConsideredLongRunning(task)) {
                        updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    }
                    currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                    cpusUsedOnSlave += usedCpusSinceStart;
                }
            } else {
                SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
                double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / (latestUsage.getTimestamp() - lastUsage.getTimestamp()));
                if (isLongRunning(task) || isConsideredLongRunning(task)) {
                    updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), taskCpusUsed, latestUsage.getDiskTotalBytes());
                }
                currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, taskCpusUsed, latestUsage.getDiskTotalBytes());
                usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                cpusUsedOnSlave += taskCpusUsed;
            }
            if (configuration.isShuffleTasksForOverloadedSlaves() && currentUsage != null && currentUsage.getCpusUsed() > 0) {
                if (isLongRunning(task) && !configuration.getDoNotShuffleRequests().contains(task.getRequestId())) {
                    Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
                    if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
                        LOG.trace("Task {} already being cleaned up to spread cpu usage, skipping", taskId);
                    } else {
                        if (maybeResources.isPresent()) {
                            possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
                        }
                    }
                }
            }
        }
        if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) {
            LOG.debug("Could not find slave or resources for slave {}", slave.getId());
        } else {
            memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue());
            cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue());
            diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get());
        }
        SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(cpusUsedOnSlave, cpuReservedOnSlave, cpusTotal, memoryBytesUsedOnSlave, memoryMbReservedOnSlave, memoryMbTotal, diskMbUsedOnSlave, diskMbReservedOnSlave, diskMbTotal, longRunningTasksUsage, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, slaveDiskUsed, slaveDiskTotal);
        if (slaveOverloaded) {
            overLoadedHosts.put(slaveUsage, possibleTasksToShuffle);
        }
        List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId());
        if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) {
            usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0));
        }
        if (slaveUsage.getMemoryBytesTotal().isPresent() && slaveUsage.getCpusTotal().isPresent()) {
            totalMemBytesUsed.getAndAdd(slaveUsage.getMemoryBytesUsed());
            totalCpuUsed.getAndAdd(slaveUsage.getCpusUsed());
            totalDiskBytesUsed.getAndAdd(slaveUsage.getDiskBytesUsed());
            totalMemBytesAvailable.getAndAdd(slaveUsage.getMemoryBytesTotal().get());
            totalCpuAvailable.getAndAdd(slaveUsage.getCpusTotal().get());
            totalDiskBytesAvailable.getAndAdd(slaveUsage.getDiskBytesTotal().get());
        }
        LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage);
    } catch (Throwable t) {
        String message = String.format("Could not get slave usage for host %s", slave.getHost());
        LOG.error(message, t);
        exceptionNotifier.notify(message, t);
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityTaskUsage(com.hubspot.singularity.SingularityTaskUsage) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) MesosSlaveMetricsSnapshotObject(com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityTask(com.hubspot.singularity.SingularityTask) AtomicLong(java.util.concurrent.atomic.AtomicLong) Resources(com.hubspot.mesos.Resources)

Example 3 with ResourceUsageType

use of com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType in project Singularity by HubSpot.

the class SingularityMesosOfferSchedulerTest method itAccountsForExpectedTaskUsage.

@Test
public void itAccountsForExpectedTaskUsage() {
    initRequest();
    double cpuReserved = 2;
    double memMbReserved = 1000;
    initFirstDeployWithResources(cpuReserved, memMbReserved);
    saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(1)));
    resourceOffers(3);
    SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0);
    String t1 = taskId.getId();
    // 2 cpus used
    MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 5, 1000);
    mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u1));
    usagePoller.runActionOnPoll();
    Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
    longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0.1);
    longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0.1);
    longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0.1);
    SingularitySlaveUsage smallUsage = new SingularitySlaveUsage(0.1, 0.1, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), longRunningTasksUsage, 1, System.currentTimeMillis(), 1, 30000, 10, 0, 0, 0, 0, 107374182);
    usageManager.saveSpecificSlaveUsageAndSetCurrent("host1", smallUsage);
    usageManager.saveSpecificSlaveUsageAndSetCurrent("host2", smallUsage);
    usageManager.saveSpecificSlaveUsageAndSetCurrent("host3", smallUsage);
    requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), SingularityUser.DEFAULT_USER);
    Assert.assertEquals(2.0, usageManager.getRequestUtilizations().get(requestId).getCpuUsed(), 0.001);
    Offer host2Offer = createOffer(6, 30000, 107374182, "host2", "host2");
    slaveAndRackManager.checkOffer(host2Offer);
    Offer host3Offer = createOffer(6, 30000, 107374182, "host3", "host3");
    slaveAndRackManager.checkOffer(host3Offer);
    Collection<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(Arrays.asList(host2Offer, host3Offer));
    Assert.assertEquals(2, offerHolders.size());
    // A single offer should only ever get a single task even though both have room for both tasks here. Adding a task should reduce the score for the next check
    for (SingularityOfferHolder offerHolder : offerHolders) {
        Assert.assertEquals(1, offerHolder.getAcceptedTasks().size());
    }
}
Also used : SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) HashMap(java.util.HashMap) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Offer(org.apache.mesos.v1.Protos.Offer) SingularityScaleRequest(com.hubspot.singularity.api.SingularityScaleRequest) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) Test(org.junit.Test)

Aggregations

MesosTaskMonitorObject (com.hubspot.mesos.json.MesosTaskMonitorObject)3 SingularitySlaveUsage (com.hubspot.singularity.SingularitySlaveUsage)3 ResourceUsageType (com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType)3 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)3 HashMap (java.util.HashMap)3 SingularityScaleRequest (com.hubspot.singularity.api.SingularityScaleRequest)2 Offer (org.apache.mesos.v1.Protos.Offer)2 Test (org.junit.Test)2 AtomicDouble (com.google.common.util.concurrent.AtomicDouble)1 Resources (com.hubspot.mesos.Resources)1 MesosSlaveMetricsSnapshotObject (com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject)1 InvalidSingularityTaskIdException (com.hubspot.singularity.InvalidSingularityTaskIdException)1 SingularityTask (com.hubspot.singularity.SingularityTask)1 SingularityTaskCurrentUsage (com.hubspot.singularity.SingularityTaskCurrentUsage)1 SingularityTaskHistoryUpdate (com.hubspot.singularity.SingularityTaskHistoryUpdate)1 SingularityTaskUsage (com.hubspot.singularity.SingularityTaskUsage)1 ArrayList (java.util.ArrayList)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1