Search in sources :

Example 6 with SingularityAgentUsage

use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.

the class SingularityMesosOfferSchedulerTest method getUsage.

private SingularityAgentUsageWithCalculatedScores getUsage(long memMbReserved, long memMbTotal, long memMbInUse, double cpusReserved, double cpusTotal, double cpuInUse, long diskMbReserved, long diskMbTotal, long diskMbInUse) {
    long totalMemBytes = memMbTotal * SingularityAgentUsage.BYTES_PER_MEGABYTE;
    long memBytesInUse = memMbInUse * SingularityAgentUsage.BYTES_PER_MEGABYTE;
    return new SingularityAgentUsageWithCalculatedScores(new SingularityAgentUsage(cpuInUse, cpusReserved, Optional.of(cpusTotal), memBytesInUse, memMbReserved, Optional.of(memMbTotal), diskMbInUse * SingularityAgentUsage.BYTES_PER_MEGABYTE, diskMbReserved, Optional.of(diskMbTotal), 1, 0L, totalMemBytes, totalMemBytes - memBytesInUse, cpusTotal, cpuInUse, cpuInUse, cpuInUse, diskMbInUse * SingularityAgentUsage.BYTES_PER_MEGABYTE, diskMbTotal * SingularityAgentUsage.BYTES_PER_MEGABYTE), MachineLoadMetric.LOAD_5, new MaxProbableUsage(0, 0, 0), 0, 0, System.currentTimeMillis());
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MaxProbableUsage(com.hubspot.singularity.mesos.SingularityAgentUsageWithCalculatedScores.MaxProbableUsage)

Example 7 with SingularityAgentUsage

use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.

the class SingularityTaskShuffler method shuffle.

public void shuffle(Map<SingularityAgentUsage, List<TaskIdWithUsage>> overloadedHosts) {
    LOG.debug("Beginning task shuffle for {} slaves", overloadedHosts.size());
    if (overloadedHosts.size() <= 0) {
        return;
    }
    List<OverusedAgent> slavesToShuffle = overloadedHosts.entrySet().stream().map(entry -> new OverusedAgent(entry.getKey(), entry.getValue(), getMostOverusedResource(entry.getKey()))).sorted((s1, s2) -> OverusedResource.prioritize(s1.resource, s2.resource)).collect(Collectors.toList());
    List<SingularityTaskCleanup> shufflingTasks = getShufflingTasks();
    Set<String> shufflingRequests = getAssociatedRequestIds(shufflingTasks);
    Map<String, Long> shufflingTasksPerHost = getShufflingTaskCountPerHost(shufflingTasks);
    long shufflingTasksOnCluster = shufflingTasks.size();
    LOG.debug("{} tasks currently shuffling on cluster", shufflingTasksOnCluster);
    for (OverusedAgent slave : slavesToShuffle) {
        if (shufflingTasksOnCluster >= configuration.getMaxTasksToShuffleTotal()) {
            LOG.debug("Not shuffling any more tasks (totalShuffleCleanups: {})", shufflingTasksOnCluster);
            break;
        }
        TaskCleanupType shuffleCleanupType = slave.resource.toTaskCleanupType();
        List<TaskIdWithUsage> shuffleCandidates = getPrioritizedShuffleCandidates(slave);
        long shufflingTasksOnSlave = shufflingTasksPerHost.getOrDefault(getHostId(slave).orElse(""), 0L);
        long availableTasksOnSlave = shuffleCandidates.size();
        double cpuUsage = getSystemCpuLoadForShuffle(slave.usage);
        double memUsageBytes = getSystemMemLoadForShuffle(slave.usage);
        for (TaskIdWithUsage task : shuffleCandidates) {
            availableTasksOnSlave--;
            if (shufflingRequests.contains(task.getTaskId().getRequestId())) {
                LOG.debug("Request {} already has a shuffling task, skipping", task.getTaskId().getRequestId());
                continue;
            }
            boolean resourceNotOverused = !isOverutilized(slave, cpuUsage, memUsageBytes);
            boolean tooManyShufflingTasks = isShufflingTooManyTasks(shufflingTasksOnSlave, shufflingTasksOnCluster);
            double taskCpuUsage = task.getUsage().getCpusUsed();
            double taskMemUsage = task.getUsage().getMemoryTotalBytes();
            if (resourceNotOverused || tooManyShufflingTasks) {
                LOG.debug("Not shuffling any more tasks on slave {} ({} overage : {}%, shuffledOnHost: {}, totalShuffleCleanups: {})", task.getTaskId().getSanitizedHost(), slave.resource.resourceType, slave.resource.getOverusageRatio() * 100, shufflingTasksOnSlave, shufflingTasksOnCluster);
                break;
            }
            long availableShufflesOnSlave = configuration.getMaxTasksToShufflePerHost() - shufflingTasksOnSlave;
            if (availableShufflesOnSlave == 1 && availableTasksOnSlave > 0 && slave.resource.exceeds(taskCpuUsage, taskMemUsage)) {
                LOG.debug("Skipping shuffling task {} on slave {} to reach threshold ({} overage : {}%, shuffledOnHost: {}, totalShuffleCleanups: {})", task.getTaskId().getId(), task.getTaskId().getSanitizedHost(), slave.resource.resourceType, slave.resource.getOverusageRatio() * 100, shufflingTasksOnSlave, shufflingTasksOnCluster);
                continue;
            }
            String message = getShuffleMessage(slave, task, cpuUsage, memUsageBytes);
            bounce(task, shuffleCleanupType, Optional.of(message));
            cpuUsage -= taskCpuUsage;
            memUsageBytes -= taskMemUsage;
            slave.resource.updateOverusage(taskCpuUsage, taskMemUsage);
            shufflingTasksOnSlave++;
            shufflingTasksOnCluster++;
            shufflingRequests.add(task.getTaskId().getRequestId());
        }
    }
    LOG.debug("Completed task shuffle for {} slaves", overloadedHosts.size());
}
Also used : SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) Logger(org.slf4j.Logger) Inject(com.google.inject.Inject) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) RequestManager(com.hubspot.singularity.data.RequestManager) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) PendingType(com.hubspot.singularity.SingularityPendingRequest.PendingType) SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) List(java.util.List) TaskCleanupType(com.hubspot.singularity.TaskCleanupType) SizeUnit(io.dropwizard.util.SizeUnit) Map(java.util.Map) Type(com.hubspot.singularity.scheduler.SingularityTaskShuffler.OverusedResource.Type) Optional(java.util.Optional) TaskManager(com.hubspot.singularity.data.TaskManager) SingularityConfiguration(com.hubspot.singularity.config.SingularityConfiguration) TaskCleanupType(com.hubspot.singularity.TaskCleanupType) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup)

Example 8 with SingularityAgentUsage

use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.

the class SingularityUsagePoller method runActionOnPoll.

@Override
public void runActionOnPoll() {
    Map<String, RequestUtilization> utilizationPerRequestId = new ConcurrentHashMap<>();
    Map<String, RequestUtilization> previousUtilizations = usageManager.getRequestUtilizations(false);
    final long now = System.currentTimeMillis();
    AtomicLong totalMemBytesUsed = new AtomicLong(0);
    AtomicLong totalMemBytesAvailable = new AtomicLong(0);
    AtomicDouble totalCpuUsed = new AtomicDouble(0.00);
    AtomicDouble totalCpuAvailable = new AtomicDouble(0.00);
    AtomicLong totalDiskBytesUsed = new AtomicLong(0);
    AtomicLong totalDiskBytesAvailable = new AtomicLong(0);
    Map<SingularityAgentUsage, List<TaskIdWithUsage>> overLoadedHosts = new ConcurrentHashMap<>();
    List<CompletableFuture<Void>> usageFutures = new ArrayList<>();
    usageHelper.getAgentsToTrackUsageFor().forEach(agent -> {
        usageFutures.add(CompletableFuture.runAsync(() -> {
            usageHelper.collectAgentUsage(agent, now, utilizationPerRequestId, previousUtilizations, overLoadedHosts, totalMemBytesUsed, totalMemBytesAvailable, totalCpuUsed, totalCpuAvailable, totalDiskBytesUsed, totalDiskBytesAvailable, false);
        }, usageExecutor));
    });
    CompletableFutures.allOf(usageFutures).join();
    usageManager.saveClusterUtilization(getClusterUtilization(utilizationPerRequestId, totalMemBytesUsed.get(), totalMemBytesAvailable.get(), totalCpuUsed.get(), totalCpuAvailable.get(), totalDiskBytesUsed.get(), totalDiskBytesAvailable.get(), now));
    utilizationPerRequestId.values().forEach(usageManager::saveRequestUtilization);
    if (configuration.isShuffleTasksForOverloadedAgents() && !disasterManager.isDisabled(SingularityAction.TASK_SHUFFLE)) {
        taskShuffler.shuffle(overLoadedHosts);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) RequestUtilization(com.hubspot.singularity.RequestUtilization) ArrayList(java.util.ArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 9 with SingularityAgentUsage

use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.

the class SingularityUsageTest method itDelaysTaskShuffles.

@Test
public void itDelaysTaskShuffles() {
    try {
        configuration.setShuffleTasksForOverloadedAgents(true);
        configuration.setMinutesBeforeNewTaskEligibleForShuffle(15);
        initRequest();
        initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
        resourceOffers(1);
        SingularityAgentUsage highUsage = new SingularityAgentUsage(15, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 200000, 30000, 10, 15, 15, 15, 0, 107374182);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highUsage, "host1"));
        SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId1.getId();
        SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
        String t2 = taskId2.getId();
        SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
        String t3 = taskId3.getId();
        statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_RUNNING, Optional.of(taskId2.getStartedAt() - TimeUnit.MINUTES.toMillis(15)));
        // task 1 using 3 cpus
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 15, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 1024);
        // task 2 using 2 cpus
        MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 10, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 1024);
        // task 3 using 1 cpus
        MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 1024);
        mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
        mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 200000, 0, 30000, 0, 0, 0, 15, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        // Tasks are not cleaned up because they haven't been running for long enough.
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId1.getId()).isPresent());
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId3.getId()).isPresent());
        // Even though it's not the worst offender, task 2 is cleaned up because it's been running long enough.
        Assertions.assertEquals(TaskCleanupType.REBALANCE_CPU_USAGE, taskManager.getTaskCleanup(taskId2.getId()).get().getCleanupType());
    } finally {
        configuration.setShuffleTasksForOverloadedAgents(false);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) Test(org.junit.jupiter.api.Test)

Example 10 with SingularityAgentUsage

use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.

the class SingularityUsageTest method itWillShuffleToDesiredMemoryThresholdIfPossibleForMemoryShuffle.

@Test
public void itWillShuffleToDesiredMemoryThresholdIfPossibleForMemoryShuffle() {
    try {
        configuration.setShuffleTasksForOverloadedAgents(true);
        configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
        configuration.setMaxTasksToShufflePerHost(2);
        configuration.setMaxTasksToShuffleTotal(5);
        configuration.setShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds(0.90);
        String t1id = "test-request-1";
        String t2id = "test-request-2";
        String t3id = "test-request-3";
        String t4id = "test-request-4";
        scheduleTask(t1id, 1, 10);
        scheduleTask(t2id, 1, 10);
        scheduleTask(t3id, 1, 10);
        scheduleTask(t4id, 1, 10);
        sms.resourceOffers(ImmutableList.of(createOffer(10, 100000, 100000, "agent1", "host1"))).join();
        System.out.println(taskManager.getActiveTaskIds());
        Map<String, Map<String, SingularityTaskId>> taskIdMap = getTaskIdMapByHostByRequest();
        SingularityTaskId task1 = taskIdMap.get("host1").get(t1id);
        SingularityTaskId task2 = taskIdMap.get("host1").get(t2id);
        SingularityTaskId task3 = taskIdMap.get("host1").get(t3id);
        SingularityTaskId task4 = taskIdMap.get("host1").get(t4id);
        startTask(task1);
        startTask(task2);
        startTask(task3);
        startTask(task4);
        // not actually necessary to trigger shuffle, but worth leaving in case that changes
        SingularityAgentUsage highMemUsage = new SingularityAgentUsage(1, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 100000, 1000, 10, 10, 10, 10, 0, 107374182);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highMemUsage, "host1"));
        MesosTaskMonitorObject t1u1 = getTaskMonitor(task1.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task1.getStartedAt()) + 5, 87000);
        MesosTaskMonitorObject t2u1 = getTaskMonitor(task2.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task2.getStartedAt()) + 5, 9000);
        MesosTaskMonitorObject t3u1 = getTaskMonitor(task3.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task3.getStartedAt()) + 5, 1100);
        MesosTaskMonitorObject t4u1 = getTaskMonitor(task4.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task4.getStartedAt()) + 5, 1000);
        mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1, t4u1));
        mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100000, 0, 1000, 0, 0, 0, 10, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        System.out.println(taskManager.getCleanupTaskIds().toString());
        // First task is not cleaned up, due to relatively high utilization.
        Assertions.assertFalse(taskManager.getTaskCleanup(task1.getId()).isPresent());
        // Second task is cleaned up, in order to reach desired memory threshold.
        Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task2.getId()).get().getCleanupType());
        // Third task is not cleaned up, as doing so will not reach the desired memory threshold given available shuffles.
        Assertions.assertFalse(taskManager.getTaskCleanup(task3.getId()).isPresent());
        // Fourth task is cleaned up, as it has the lowest utilization.
        Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task4.getId()).get().getCleanupType());
    } finally {
        configuration.setShuffleTasksForOverloadedAgents(false);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) HashMap(java.util.HashMap) Map(java.util.Map) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) Test(org.junit.jupiter.api.Test)

Aggregations

SingularityAgentUsage (com.hubspot.singularity.SingularityAgentUsage)17 MesosTaskMonitorObject (com.hubspot.mesos.json.MesosTaskMonitorObject)14 SingularityAgentUsageWithId (com.hubspot.singularity.SingularityAgentUsageWithId)14 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)14 Test (org.junit.jupiter.api.Test)13 MesosAgentMetricsSnapshotObject (com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject)12 HashMap (java.util.HashMap)6 Map (java.util.Map)6 ArrayList (java.util.ArrayList)3 AtomicDouble (com.google.common.util.concurrent.AtomicDouble)2 SingularityScaleRequest (com.hubspot.singularity.api.SingularityScaleRequest)2 List (java.util.List)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 Offer (org.apache.mesos.v1.Protos.Offer)2 Inject (com.google.inject.Inject)1 Resources (com.hubspot.mesos.Resources)1 InvalidSingularityTaskIdException (com.hubspot.singularity.InvalidSingularityTaskIdException)1 RequestUtilization (com.hubspot.singularity.RequestUtilization)1 SingularityPendingRequest (com.hubspot.singularity.SingularityPendingRequest)1 PendingType (com.hubspot.singularity.SingularityPendingRequest.PendingType)1