Search in sources :

Example 1 with SingularitySlaveUsage

use of com.hubspot.singularity.SingularitySlaveUsage in project Singularity by HubSpot.

the class SingularityUsageTest method itCreatesTaskCleanupsWhenAMachineIsOverloaded.

@Test
public void itCreatesTaskCleanupsWhenAMachineIsOverloaded() {
    try {
        configuration.setShuffleTasksForOverloadedSlaves(true);
        initRequest();
        initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
        resourceOffers(1);
        SingularitySlaveUsage highUsage = new SingularitySlaveUsage(15, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), Collections.emptyMap(), 1, System.currentTimeMillis(), 1, 30000, 10, 15, 15, 15, 0, 107374182);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host1", highUsage);
        SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId1.getId();
        SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
        String t2 = taskId2.getId();
        SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
        String t3 = taskId3.getId();
        statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
        // task 1 using 3 cpus
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 15, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 1024);
        // task 2 using 2 cpus
        MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 10, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 1024);
        // task 3 using 1 cpus
        MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 1024);
        mesosClient.setSlaveResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
        mesosClient.setSlaveMetricsSnapshot("host1", new MesosSlaveMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        // First task is cleaned up
        Assert.assertEquals(taskManager.getTaskCleanup(taskId1.getId()).get().getCleanupType(), TaskCleanupType.REBALANCE_CPU_USAGE);
        // Second task is not cleaned up because it is from the same request as task 1
        Assert.assertFalse(taskManager.getTaskCleanup(taskId2.getId()).isPresent());
    } finally {
        configuration.setShuffleTasksForOverloadedSlaves(false);
    }
}
Also used : SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) MesosSlaveMetricsSnapshotObject(com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Test(org.junit.Test)

Example 2 with SingularitySlaveUsage

use of com.hubspot.singularity.SingularitySlaveUsage in project Singularity by HubSpot.

the class SingularityMesosOfferSchedulerTest method itAccountsForMaxHistoricalTaskUsage.

@Test
public void itAccountsForMaxHistoricalTaskUsage() {
    try {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.PROBABLE_MAX_USAGE);
        initRequest();
        double cpuReserved = 2;
        double memMbReserved = 1000;
        initFirstDeployWithResources(cpuReserved, memMbReserved);
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(1)));
        resourceOffers(3);
        SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId.getId();
        // 2 cpus used
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 5, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u1));
        usagePoller.runActionOnPoll();
        // 1 cpus used
        MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 11, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 6, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u2));
        usagePoller.runActionOnPoll();
        Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
        longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0.1);
        SingularitySlaveUsage smallUsage = new SingularitySlaveUsage(0.1, 0.1, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), longRunningTasksUsage, 1, System.currentTimeMillis(), 1, 30000, 10, 0, 0, 0, 0, 107374182);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host1", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host2", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host3", smallUsage);
        requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), SingularityUser.DEFAULT_USER);
        Assert.assertEquals(3.0, usageManager.getRequestUtilizations().get(requestId).getCpuUsed(), 0.001);
        Offer host2Offer = createOffer(6, 30000, 107374182, "host2", "host2");
        slaveAndRackManager.checkOffer(host2Offer);
        Offer host3Offer = createOffer(6, 30000, 107374182, "host3", "host3");
        slaveAndRackManager.checkOffer(host3Offer);
        Collection<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(Arrays.asList(host2Offer, host3Offer));
        Assert.assertEquals(2, offerHolders.size());
        // A single offer should only ever get a single task even though both have room for both tasks here. Adding a task should reduce the score for the next check
        for (SingularityOfferHolder offerHolder : offerHolders) {
            Assert.assertEquals(1, offerHolder.getAcceptedTasks().size());
        }
    } finally {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.SPREAD_TASK_USAGE);
    }
}
Also used : SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) HashMap(java.util.HashMap) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Offer(org.apache.mesos.v1.Protos.Offer) SingularityScaleRequest(com.hubspot.singularity.api.SingularityScaleRequest) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) Test(org.junit.Test)

Example 3 with SingularitySlaveUsage

use of com.hubspot.singularity.SingularitySlaveUsage in project Singularity by HubSpot.

the class SingularityUsagePoller method shuffleTasksOnOverloadedHosts.

private void shuffleTasksOnOverloadedHosts(Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts) {
    List<SingularityTaskCleanup> shuffleCleanups = taskManager.getCleanupTasks().stream().filter((taskCleanup) -> taskCleanup.getCleanupType() == TaskCleanupType.REBALANCE_CPU_USAGE).collect(Collectors.toList());
    long currentShuffleCleanupsTotal = shuffleCleanups.size();
    Set<String> requestsWithShuffledTasks = shuffleCleanups.stream().map((taskCleanup) -> taskCleanup.getTaskId().getRequestId()).collect(Collectors.toSet());
    List<SingularitySlaveUsage> overLoadedSlavesByUsage = overLoadedHosts.keySet().stream().sorted((usage1, usage2) -> Double.compare(getSystemLoadForShuffle(usage2), getSystemLoadForShuffle(usage1))).collect(Collectors.toList());
    for (SingularitySlaveUsage overloadedSlave : overLoadedSlavesByUsage) {
        if (currentShuffleCleanupsTotal >= configuration.getMaxTasksToShuffleTotal()) {
            LOG.debug("Not shuffling any more tasks (totalShuffleCleanups: {})", currentShuffleCleanupsTotal);
            break;
        }
        int shuffledTasksOnSlave = 0;
        List<TaskIdWithUsage> possibleTasksToShuffle = overLoadedHosts.get(overloadedSlave);
        possibleTasksToShuffle.sort((u1, u2) -> Double.compare(u2.getUsage().getCpusUsed() / u2.getRequestedResources().getCpus(), u1.getUsage().getCpusUsed() / u1.getRequestedResources().getCpus()));
        double systemLoad = getSystemLoadForShuffle(overloadedSlave);
        double cpuOverage = systemLoad - overloadedSlave.getSystemCpusTotal();
        for (TaskIdWithUsage taskIdWithUsage : possibleTasksToShuffle) {
            if (requestsWithShuffledTasks.contains(taskIdWithUsage.getTaskId().getRequestId())) {
                LOG.debug("Request {} already has a shuffling task, skipping", taskIdWithUsage.getTaskId().getRequestId());
                continue;
            }
            if (cpuOverage <= 0 || shuffledTasksOnSlave > configuration.getMaxTasksToShufflePerHost() || currentShuffleCleanupsTotal >= configuration.getMaxTasksToShuffleTotal()) {
                LOG.debug("Not shuffling any more tasks (overage: {}, shuffledOnHost: {}, totalShuffleCleanups: {})", cpuOverage, shuffledTasksOnSlave, currentShuffleCleanupsTotal);
                break;
            }
            LOG.debug("Cleaning up task {} to free up cpu on overloaded host (remaining cpu overage: {})", taskIdWithUsage.getTaskId(), cpuOverage);
            Optional<String> message = Optional.of(String.format("Load on slave is %s / %s, shuffling task to less busy host", systemLoad, overloadedSlave.getSystemCpusTotal()));
            taskManager.createTaskCleanup(new SingularityTaskCleanup(Optional.absent(), TaskCleanupType.REBALANCE_CPU_USAGE, System.currentTimeMillis(), taskIdWithUsage.getTaskId(), message, Optional.of(UUID.randomUUID().toString()), Optional.absent(), Optional.absent()));
            requestManager.addToPendingQueue(new SingularityPendingRequest(taskIdWithUsage.getTaskId().getRequestId(), taskIdWithUsage.getTaskId().getDeployId(), System.currentTimeMillis(), Optional.absent(), PendingType.TASK_BOUNCE, Optional.absent(), Optional.absent(), Optional.absent(), message, Optional.of(UUID.randomUUID().toString())));
            cpuOverage -= taskIdWithUsage.getUsage().getCpusUsed();
            shuffledTasksOnSlave++;
            currentShuffleCleanupsTotal++;
            requestsWithShuffledTasks.add(taskIdWithUsage.getTaskId().getRequestId());
        }
    }
}
Also used : SingularityTask(com.hubspot.singularity.SingularityTask) DeployManager(com.hubspot.singularity.data.DeployManager) ExtendedTaskState(com.hubspot.singularity.ExtendedTaskState) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Inject(com.google.inject.Inject) RequestManager(com.hubspot.singularity.data.RequestManager) LoggerFactory(org.slf4j.LoggerFactory) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) Optional(com.google.common.base.Optional) Map(java.util.Map) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) Resources(com.hubspot.mesos.Resources) TaskManager(com.hubspot.singularity.data.TaskManager) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) CompletableFutures(com.hubspot.singularity.async.CompletableFutures) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) PendingType(com.hubspot.singularity.SingularityPendingRequest.PendingType) List(java.util.List) SingularityClusterUtilization(com.hubspot.singularity.SingularityClusterUtilization) RequestUtilization(com.hubspot.singularity.RequestUtilization) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) MesosSlaveMetricsSnapshotObject(com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) SingularitySlave(com.hubspot.singularity.SingularitySlave) SingularityDeleteResult(com.hubspot.singularity.SingularityDeleteResult) SingularityTaskUsage(com.hubspot.singularity.SingularityTaskUsage) SingularityExceptionNotifier(com.hubspot.singularity.sentry.SingularityExceptionNotifier) ExecutorService(java.util.concurrent.ExecutorService) SingularityConfiguration(com.hubspot.singularity.config.SingularityConfiguration) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) Logger(org.slf4j.Logger) MesosClient(com.hubspot.mesos.client.MesosClient) SingularityDeploy(com.hubspot.singularity.SingularityDeploy) AsyncSemaphore(com.hubspot.singularity.async.AsyncSemaphore) TimeUnit(java.util.concurrent.TimeUnit) UsageManager(com.hubspot.singularity.data.UsageManager) AtomicLong(java.util.concurrent.atomic.AtomicLong) TaskCleanupType(com.hubspot.singularity.TaskCleanupType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup)

Example 4 with SingularitySlaveUsage

use of com.hubspot.singularity.SingularitySlaveUsage in project Singularity by HubSpot.

the class SingularityUsagePoller method runActionOnPoll.

@Override
public void runActionOnPoll() {
    Map<String, RequestUtilization> utilizationPerRequestId = new ConcurrentHashMap<>();
    final long now = System.currentTimeMillis();
    AtomicLong totalMemBytesUsed = new AtomicLong(0);
    AtomicLong totalMemBytesAvailable = new AtomicLong(0);
    AtomicDouble totalCpuUsed = new AtomicDouble(0.00);
    AtomicDouble totalCpuAvailable = new AtomicDouble(0.00);
    AtomicLong totalDiskBytesUsed = new AtomicLong(0);
    AtomicLong totalDiskBytesAvailable = new AtomicLong(0);
    Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts = new ConcurrentHashMap<>();
    List<CompletableFuture<Void>> usageFutures = new ArrayList<>();
    usageHelper.getSlavesToTrackUsageFor().forEach((slave) -> {
        usageFutures.add(usageCollectionSemaphore.call(() -> CompletableFuture.runAsync(() -> {
            collectSlaveUage(slave, now, utilizationPerRequestId, overLoadedHosts, totalMemBytesUsed, totalMemBytesAvailable, totalCpuUsed, totalCpuAvailable, totalDiskBytesUsed, totalDiskBytesAvailable);
        })));
    });
    CompletableFutures.allOf(usageFutures).join();
    usageManager.saveClusterUtilization(getClusterUtilization(utilizationPerRequestId, totalMemBytesUsed.get(), totalMemBytesAvailable.get(), totalCpuUsed.get(), totalCpuAvailable.get(), totalDiskBytesUsed.get(), totalDiskBytesAvailable.get(), now));
    if (configuration.isShuffleTasksForOverloadedSlaves()) {
        shuffleTasksOnOverloadedHosts(overLoadedHosts);
    }
}
Also used : AtomicDouble(com.google.common.util.concurrent.AtomicDouble) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) RequestUtilization(com.hubspot.singularity.RequestUtilization) ArrayList(java.util.ArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong) CompletableFuture(java.util.concurrent.CompletableFuture) List(java.util.List) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 5 with SingularitySlaveUsage

use of com.hubspot.singularity.SingularitySlaveUsage in project Singularity by HubSpot.

the class SingularityUsagePoller method collectSlaveUage.

private void collectSlaveUage(SingularitySlave slave, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable) {
    Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
    longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0);
    longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0);
    Optional<Long> memoryMbTotal = Optional.absent();
    Optional<Double> cpusTotal = Optional.absent();
    Optional<Long> diskMbTotal = Optional.absent();
    long memoryMbReservedOnSlave = 0;
    double cpuReservedOnSlave = 0;
    long diskMbReservedOnSlave = 0;
    long memoryBytesUsedOnSlave = 0;
    double cpusUsedOnSlave = 0;
    long diskMbUsedOnSlave = 0;
    try {
        List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost());
        MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost());
        double systemMemTotalBytes = 0;
        double systemMemFreeBytes = 0;
        double systemLoad1Min = 0;
        double systemLoad5Min = 0;
        double systemLoad15Min = 0;
        double slaveDiskUsed = 0;
        double slaveDiskTotal = 0;
        double systemCpusTotal = 0;
        if (slaveMetricsSnapshot != null) {
            systemMemTotalBytes = slaveMetricsSnapshot.getSystemMemTotalBytes();
            systemMemFreeBytes = slaveMetricsSnapshot.getSystemMemFreeBytes();
            systemLoad1Min = slaveMetricsSnapshot.getSystemLoad1Min();
            systemLoad5Min = slaveMetricsSnapshot.getSystemLoad5Min();
            systemLoad15Min = slaveMetricsSnapshot.getSystemLoad15Min();
            slaveDiskUsed = slaveMetricsSnapshot.getSlaveDiskUsed();
            slaveDiskTotal = slaveMetricsSnapshot.getSlaveDiskTotal();
            systemCpusTotal = slaveMetricsSnapshot.getSystemCpusTotal();
        }
        double systemLoad;
        switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
            case LOAD_1:
                systemLoad = systemLoad1Min;
                break;
            case LOAD_15:
                systemLoad = systemLoad15Min;
                break;
            case LOAD_5:
            default:
                systemLoad = systemLoad5Min;
                break;
        }
        boolean slaveOverloaded = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
        List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
        for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
            String taskId = taskUsage.getSource();
            SingularityTaskId task;
            try {
                task = SingularityTaskId.valueOf(taskId);
            } catch (InvalidSingularityTaskIdException e) {
                LOG.error("Couldn't get SingularityTaskId for {}", taskUsage);
                continue;
            }
            SingularityTaskUsage latestUsage = getUsage(taskUsage);
            List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(taskId);
            clearOldUsage(taskId);
            usageManager.saveSpecificTaskUsage(taskId, latestUsage);
            Optional<SingularityTask> maybeTask = taskManager.getTask(task);
            Optional<Resources> maybeResources = Optional.absent();
            if (maybeTask.isPresent()) {
                maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().or(maybeTask.get().getTaskRequest().getDeploy().getResources());
                if (maybeResources.isPresent()) {
                    Resources taskResources = maybeResources.get();
                    double memoryMbReservedForTask = taskResources.getMemoryMb();
                    double cpuReservedForTask = taskResources.getCpus();
                    double diskMbReservedForTask = taskResources.getDiskMb();
                    memoryMbReservedOnSlave += memoryMbReservedForTask;
                    cpuReservedOnSlave += cpuReservedForTask;
                    diskMbReservedOnSlave += diskMbReservedForTask;
                    updateRequestUtilization(utilizationPerRequestId, pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
                }
            }
            memoryBytesUsedOnSlave += latestUsage.getMemoryTotalBytes();
            diskMbUsedOnSlave += latestUsage.getDiskTotalBytes();
            SingularityTaskCurrentUsage currentUsage = null;
            if (pastTaskUsages.isEmpty()) {
                Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
                if (maybeStartingUpdate.isPresent()) {
                    long startTimestampSeconds = TimeUnit.MILLISECONDS.toSeconds(maybeStartingUpdate.get().getTimestamp());
                    double usedCpusSinceStart = latestUsage.getCpuSeconds() / (latestUsage.getTimestamp() - startTimestampSeconds);
                    if (isLongRunning(task) || isConsideredLongRunning(task)) {
                        updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    }
                    currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                    cpusUsedOnSlave += usedCpusSinceStart;
                }
            } else {
                SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
                double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / (latestUsage.getTimestamp() - lastUsage.getTimestamp()));
                if (isLongRunning(task) || isConsideredLongRunning(task)) {
                    updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), taskCpusUsed, latestUsage.getDiskTotalBytes());
                }
                currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, taskCpusUsed, latestUsage.getDiskTotalBytes());
                usageManager.saveCurrentTaskUsage(taskId, currentUsage);
                cpusUsedOnSlave += taskCpusUsed;
            }
            if (configuration.isShuffleTasksForOverloadedSlaves() && currentUsage != null && currentUsage.getCpusUsed() > 0) {
                if (isLongRunning(task) && !configuration.getDoNotShuffleRequests().contains(task.getRequestId())) {
                    Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
                    if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
                        LOG.trace("Task {} already being cleaned up to spread cpu usage, skipping", taskId);
                    } else {
                        if (maybeResources.isPresent()) {
                            possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
                        }
                    }
                }
            }
        }
        if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) {
            LOG.debug("Could not find slave or resources for slave {}", slave.getId());
        } else {
            memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue());
            cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue());
            diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get());
        }
        SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(cpusUsedOnSlave, cpuReservedOnSlave, cpusTotal, memoryBytesUsedOnSlave, memoryMbReservedOnSlave, memoryMbTotal, diskMbUsedOnSlave, diskMbReservedOnSlave, diskMbTotal, longRunningTasksUsage, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, slaveDiskUsed, slaveDiskTotal);
        if (slaveOverloaded) {
            overLoadedHosts.put(slaveUsage, possibleTasksToShuffle);
        }
        List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId());
        if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) {
            usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0));
        }
        if (slaveUsage.getMemoryBytesTotal().isPresent() && slaveUsage.getCpusTotal().isPresent()) {
            totalMemBytesUsed.getAndAdd(slaveUsage.getMemoryBytesUsed());
            totalCpuUsed.getAndAdd(slaveUsage.getCpusUsed());
            totalDiskBytesUsed.getAndAdd(slaveUsage.getDiskBytesUsed());
            totalMemBytesAvailable.getAndAdd(slaveUsage.getMemoryBytesTotal().get());
            totalCpuAvailable.getAndAdd(slaveUsage.getCpusTotal().get());
            totalDiskBytesAvailable.getAndAdd(slaveUsage.getDiskBytesTotal().get());
        }
        LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage);
    } catch (Throwable t) {
        String message = String.format("Could not get slave usage for host %s", slave.getHost());
        LOG.error(message, t);
        exceptionNotifier.notify(message, t);
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityTaskUsage(com.hubspot.singularity.SingularityTaskUsage) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) MesosSlaveMetricsSnapshotObject(com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityTask(com.hubspot.singularity.SingularityTask) AtomicLong(java.util.concurrent.atomic.AtomicLong) Resources(com.hubspot.mesos.Resources)

Aggregations

SingularitySlaveUsage (com.hubspot.singularity.SingularitySlaveUsage)7 MesosTaskMonitorObject (com.hubspot.mesos.json.MesosTaskMonitorObject)6 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)6 MesosSlaveMetricsSnapshotObject (com.hubspot.mesos.json.MesosSlaveMetricsSnapshotObject)4 ResourceUsageType (com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType)4 HashMap (java.util.HashMap)4 Test (org.junit.Test)4 AtomicDouble (com.google.common.util.concurrent.AtomicDouble)3 ArrayList (java.util.ArrayList)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 Resources (com.hubspot.mesos.Resources)2 InvalidSingularityTaskIdException (com.hubspot.singularity.InvalidSingularityTaskIdException)2 RequestUtilization (com.hubspot.singularity.RequestUtilization)2 SingularityTask (com.hubspot.singularity.SingularityTask)2 SingularityTaskCurrentUsage (com.hubspot.singularity.SingularityTaskCurrentUsage)2 SingularityTaskHistoryUpdate (com.hubspot.singularity.SingularityTaskHistoryUpdate)2 SingularityTaskUsage (com.hubspot.singularity.SingularityTaskUsage)2 SingularityScaleRequest (com.hubspot.singularity.api.SingularityScaleRequest)2 List (java.util.List)2