Search in sources :

Example 1 with MesosAgentMetricsSnapshotObject

use of com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject in project Singularity by HubSpot.

the class SingularityUsageHelper method collectAgentUsage.

public void collectAgentUsage(SingularityAgent agent, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<String, RequestUtilization> previousUtilizations, Map<SingularityAgentUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable, boolean useShortTimeout) {
    Optional<Long> memoryMbTotal = Optional.empty();
    Optional<Double> cpusTotal = Optional.empty();
    Optional<Long> diskMbTotal = Optional.empty();
    long memoryMbReserved = 0;
    double cpuReserved = 0;
    long diskMbReserved = 0;
    long memoryBytesUsed = 0;
    double cpusUsed = 0;
    long diskMbUsed = 0;
    try {
        List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getAgentResourceUsage(agent.getHost(), useShortTimeout);
        MesosAgentMetricsSnapshotObject agentMetricsSnapshot = mesosClient.getAgentMetricsSnapshot(agent.getHost());
        double systemMemTotalBytes = 0;
        double systemMemFreeBytes = 0;
        double systemLoad1Min = 0;
        double systemLoad5Min = 0;
        double systemLoad15Min = 0;
        double diskUsed = 0;
        double diskTotal = 0;
        double systemCpusTotal = 0;
        if (agentMetricsSnapshot != null) {
            systemMemTotalBytes = agentMetricsSnapshot.getSystemMemTotalBytes();
            systemMemFreeBytes = agentMetricsSnapshot.getSystemMemFreeBytes();
            systemLoad1Min = agentMetricsSnapshot.getSystemLoad1Min();
            systemLoad5Min = agentMetricsSnapshot.getSystemLoad5Min();
            systemLoad15Min = agentMetricsSnapshot.getSystemLoad15Min();
            diskUsed = agentMetricsSnapshot.getDiskUsed();
            diskTotal = agentMetricsSnapshot.getDiskTotal();
            systemCpusTotal = agentMetricsSnapshot.getSystemCpusTotal();
        }
        double systemLoad;
        switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
            case LOAD_1:
                systemLoad = systemLoad1Min;
                break;
            case LOAD_15:
                systemLoad = systemLoad15Min;
                break;
            case LOAD_5:
            default:
                systemLoad = systemLoad5Min;
                break;
        }
        boolean overloadedForCpu = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
        boolean experiencingHighMemUsage = ((systemMemTotalBytes - systemMemFreeBytes) / systemMemTotalBytes) > configuration.getShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds();
        List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
        Set<String> shuffleBlacklist = new HashSet<>(shuffleConfigurationManager.getShuffleBlocklist());
        for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
            if (!taskUsage.getFrameworkId().equals(configuration.getMesosConfiguration().getFrameworkId())) {
                LOG.info("Skipping task {} from other framework {}", taskUsage.getSource(), taskUsage.getFrameworkId());
                continue;
            }
            String taskId = taskUsage.getSource();
            SingularityTaskId task;
            try {
                task = SingularityTaskId.valueOf(taskId);
            } catch (InvalidSingularityTaskIdException e) {
                LOG.warn("Couldn't get SingularityTaskId for {}", taskUsage);
                continue;
            }
            SingularityTaskUsage latestUsage = getUsage(taskUsage);
            List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(task);
            usageManager.saveSpecificTaskUsage(task, latestUsage);
            Optional<SingularityTask> maybeTask = taskManager.getTask(task);
            Optional<Resources> maybeResources = Optional.empty();
            if (maybeTask.isPresent()) {
                maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().isPresent() ? maybeTask.get().getTaskRequest().getPendingTask().getResources() : maybeTask.get().getTaskRequest().getDeploy().getResources();
                if (maybeResources.isPresent()) {
                    Resources taskResources = maybeResources.get();
                    double memoryMbReservedForTask = taskResources.getMemoryMb();
                    double cpuReservedForTask = taskResources.getCpus();
                    double diskMbReservedForTask = taskResources.getDiskMb();
                    memoryMbReserved += memoryMbReservedForTask;
                    cpuReserved += cpuReservedForTask;
                    diskMbReserved += diskMbReservedForTask;
                    updateRequestUtilization(utilizationPerRequestId, previousUtilizations.get(maybeTask.get().getTaskRequest().getRequest().getId()), pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
                }
            }
            memoryBytesUsed += latestUsage.getMemoryTotalBytes();
            diskMbUsed += latestUsage.getDiskTotalBytes();
            SingularityTaskCurrentUsage currentUsage = null;
            if (pastTaskUsages.isEmpty()) {
                Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
                if (maybeStartingUpdate.isPresent()) {
                    long startTimestamp = maybeStartingUpdate.get().getTimestamp();
                    double usedCpusSinceStart = latestUsage.getCpuSeconds() / TimeUnit.MILLISECONDS.toSeconds(latestUsage.getTimestamp() - startTimestamp);
                    currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), (long) taskUsage.getStatistics().getTimestamp() * 1000, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
                    cpusUsed += usedCpusSinceStart;
                }
            } else {
                SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
                double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / TimeUnit.MILLISECONDS.toSeconds(latestUsage.getTimestamp() - lastUsage.getTimestamp()));
                currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), (long) taskUsage.getStatistics().getTimestamp() * 1000, taskCpusUsed, latestUsage.getDiskTotalBytes());
                cpusUsed += taskCpusUsed;
            }
            if (currentUsage != null && currentUsage.getCpusUsed() > 0) {
                if (isEligibleForShuffle(task, shuffleBlacklist)) {
                    Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
                    if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
                        LOG.trace("Task {} already being cleaned up to spread cpu or mem usage, skipping", taskId);
                    } else {
                        if (maybeResources.isPresent()) {
                            possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
                        }
                    }
                }
            }
        }
        if (!agent.getResources().isPresent() || !agent.getResources().get().getMemoryMegaBytes().isPresent() || !agent.getResources().get().getNumCpus().isPresent()) {
            LOG.debug("Could not find agent or resources for agent {}", agent.getId());
        } else {
            memoryMbTotal = Optional.of(agent.getResources().get().getMemoryMegaBytes().get().longValue());
            cpusTotal = Optional.of(agent.getResources().get().getNumCpus().get().doubleValue());
            diskMbTotal = Optional.of(agent.getResources().get().getDiskSpace().get());
        }
        SingularityAgentUsage agentUsage = new SingularityAgentUsage(cpusUsed, cpuReserved, cpusTotal, memoryBytesUsed, memoryMbReserved, memoryMbTotal, diskMbUsed, diskMbReserved, diskMbTotal, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, diskUsed, diskTotal);
        if (overloadedForCpu || experiencingHighMemUsage) {
            overLoadedHosts.put(agentUsage, possibleTasksToShuffle);
        }
        if (agentUsage.getMemoryBytesTotal().isPresent() && agentUsage.getCpusTotal().isPresent()) {
            totalMemBytesUsed.getAndAdd((long) agentUsage.getMemoryBytesUsed());
            totalCpuUsed.getAndAdd(agentUsage.getCpusUsed());
            totalDiskBytesUsed.getAndAdd((long) agentUsage.getDiskBytesUsed());
            totalMemBytesAvailable.getAndAdd(agentUsage.getMemoryBytesTotal().get());
            totalCpuAvailable.getAndAdd(agentUsage.getCpusTotal().get());
            totalDiskBytesAvailable.getAndAdd(agentUsage.getDiskBytesTotal().get());
        }
        LOG.debug("Saving agent {} usage {}", agent.getHost(), agentUsage);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(agentUsage, agent.getId()));
    } catch (Throwable t) {
        String message = String.format("Could not get agent usage for host %s", agent.getHost());
        LOG.error(message, t);
        exceptionNotifier.notify(message, t);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) ArrayList(java.util.ArrayList) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) HashSet(java.util.HashSet) SingularityTaskUsage(com.hubspot.singularity.SingularityTaskUsage) SingularityTaskCurrentUsage(com.hubspot.singularity.SingularityTaskCurrentUsage) AtomicDouble(com.google.common.util.concurrent.AtomicDouble) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityTask(com.hubspot.singularity.SingularityTask) AtomicLong(java.util.concurrent.atomic.AtomicLong) Resources(com.hubspot.mesos.Resources) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId)

Example 2 with MesosAgentMetricsSnapshotObject

use of com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject in project Singularity by HubSpot.

the class SingularityMesosOfferScheduler method checkOffers.

Collection<SingularityOfferHolder> checkOffers(final Map<String, Offer> offers, long start) {
    if (offers.isEmpty()) {
        LOG.debug("No offers to check");
        return Collections.emptyList();
    }
    final List<SingularityTaskRequestHolder> sortedTaskRequestHolders = getSortedDueTaskRequests();
    final int numDueTasks = sortedTaskRequestHolders.size();
    final Map<String, SingularityOfferHolder> offerHolders = offers.values().stream().collect(Collectors.groupingBy(o -> o.getAgentId().getValue())).entrySet().stream().filter(e -> e.getValue().size() > 0).map(e -> {
        List<Offer> offersList = e.getValue();
        String agentId = e.getKey();
        return new SingularityOfferHolder(offersList, numDueTasks, agentAndRackHelper.getRackIdOrDefault(offersList.get(0)), agentId, offersList.get(0).getHostname(), agentAndRackHelper.getTextAttributes(offersList.get(0)), agentAndRackHelper.getReservedAgentAttributes(offersList.get(0)));
    }).collect(Collectors.toMap(SingularityOfferHolder::getAgentId, Function.identity()));
    if (sortedTaskRequestHolders.isEmpty()) {
        return offerHolders.values();
    }
    final AtomicInteger tasksScheduled = new AtomicInteger(0);
    Map<String, RequestUtilization> requestUtilizations = usageManager.getRequestUtilizations(false);
    List<SingularityTaskId> activeTaskIds = taskManager.getActiveTaskIds();
    Map<String, SingularityAgentUsageWithId> currentUsages = usageManager.getAllCurrentAgentUsage();
    List<CompletableFuture<Void>> currentUsagesFutures = new ArrayList<>();
    for (SingularityOfferHolder offerHolder : offerHolders.values()) {
        currentUsagesFutures.add(runAsync(() -> {
            String agentId = offerHolder.getAgentId();
            Optional<SingularityAgentUsageWithId> maybeUsage = Optional.ofNullable(currentUsages.get(agentId));
            if (configuration.isReCheckMetricsForLargeNewTaskCount() && maybeUsage.isPresent()) {
                long newTaskCount = taskManager.getActiveTaskIds().stream().filter(t -> t.getStartedAt() > maybeUsage.get().getTimestamp() && t.getSanitizedHost().equals(offerHolder.getSanitizedHost())).count();
                if (newTaskCount >= maybeUsage.get().getNumTasks() / 2) {
                    try {
                        MesosAgentMetricsSnapshotObject metricsSnapshot = usageHelper.getMetricsSnapshot(offerHolder.getHostname());
                        if (metricsSnapshot.getSystemLoad5Min() / metricsSnapshot.getSystemCpusTotal() > mesosConfiguration.getRecheckMetricsLoad1Threshold() || metricsSnapshot.getSystemLoad1Min() / metricsSnapshot.getSystemCpusTotal() > mesosConfiguration.getRecheckMetricsLoad5Threshold()) {
                            // Come back to this agent after we have collected more metrics
                            LOG.info("Skipping evaluation of {} until new metrics are collected. Current load is load1: {}, load5: {}", offerHolder.getHostname(), metricsSnapshot.getSystemLoad1Min(), metricsSnapshot.getSystemLoad5Min());
                            currentUsages.remove(agentId);
                        }
                    } catch (Throwable t) {
                        LOG.warn("Could not check metrics for host {}, skipping", offerHolder.getHostname());
                        currentUsages.remove(agentId);
                    }
                }
            }
        }));
    }
    CompletableFutures.allOf(currentUsagesFutures).join();
    List<CompletableFuture<Void>> usagesWithScoresFutures = new ArrayList<>();
    Map<String, SingularityAgentUsageWithCalculatedScores> currentUsagesById = new ConcurrentHashMap<>();
    for (SingularityAgentUsageWithId usage : currentUsages.values()) {
        if (offerHolders.containsKey(usage.getAgentId())) {
            usagesWithScoresFutures.add(runAsync(() -> currentUsagesById.put(usage.getAgentId(), new SingularityAgentUsageWithCalculatedScores(usage, mesosConfiguration.getScoreUsingSystemLoad(), getMaxProbableUsageForAgent(activeTaskIds, requestUtilizations, offerHolders.get(usage.getAgentId()).getSanitizedHost()), mesosConfiguration.getLoad5OverloadedThreshold(), mesosConfiguration.getLoad1OverloadedThreshold(), usage.getTimestamp()))));
        }
    }
    CompletableFutures.allOf(usagesWithScoresFutures).join();
    long startCheck = System.currentTimeMillis();
    LOG.debug("Found agent usages and scores after {}ms", startCheck - start);
    Map<SingularityDeployKey, Optional<SingularityDeployStatistics>> deployStatsCache = new ConcurrentHashMap<>();
    Set<String> overloadedHosts = Sets.newConcurrentHashSet();
    AtomicInteger noMatches = new AtomicInteger();
    // We spend much of the offer check loop for request level locks. Wait for the locks in parallel, but ensure that actual offer checks
    // are done in serial to not over commit a single offer
    ReentrantLock offerCheckTempLock = new ReentrantLock(false);
    CompletableFutures.allOf(sortedTaskRequestHolders.stream().collect(Collectors.groupingBy(t -> t.getTaskRequest().getRequest().getId())).entrySet().stream().map(entry -> runAsync(() -> {
        lock.tryRunWithRequestLock(() -> {
            offerCheckTempLock.lock();
            try {
                long startRequest = System.currentTimeMillis();
                int evaluated = 0;
                for (SingularityTaskRequestHolder taskRequestHolder : entry.getValue()) {
                    long now = System.currentTimeMillis();
                    boolean isOfferLoopTakingTooLong = now - startCheck > mesosConfiguration.getOfferLoopTimeoutMillis();
                    boolean isRequestInOfferLoopTakingTooLong = (now - startRequest > mesosConfiguration.getOfferLoopRequestTimeoutMillis() && evaluated > 1);
                    if (isOfferLoopTakingTooLong || isRequestInOfferLoopTakingTooLong) {
                        LOG.warn("{} is holding the offer lock for too long, skipping remaining {} tasks for scheduling", taskRequestHolder.getTaskRequest().getRequest().getId(), entry.getValue().size() - evaluated);
                        break;
                    }
                    evaluated++;
                    List<SingularityTaskId> activeTaskIdsForRequest = leaderCache.getActiveTaskIdsForRequest(taskRequestHolder.getTaskRequest().getRequest().getId());
                    if (isTooManyInstancesForRequest(taskRequestHolder.getTaskRequest(), activeTaskIdsForRequest)) {
                        LOG.debug("Skipping pending task {}, too many instances already running", taskRequestHolder.getTaskRequest().getPendingTask().getPendingTaskId());
                        continue;
                    }
                    Map<String, Double> scorePerOffer = new ConcurrentHashMap<>();
                    for (SingularityOfferHolder offerHolder : offerHolders.values()) {
                        if (!isOfferFull(offerHolder)) {
                            if (calculateScore(requestUtilizations, currentUsagesById, taskRequestHolder, scorePerOffer, activeTaskIdsForRequest, offerHolder, deployStatsCache, overloadedHosts) > mesosConfiguration.getGoodEnoughScoreThreshold()) {
                                break;
                            }
                        }
                    }
                    if (!scorePerOffer.isEmpty()) {
                        SingularityOfferHolder bestOffer = offerHolders.get(Collections.max(scorePerOffer.entrySet(), Map.Entry.comparingByValue()).getKey());
                        LOG.info("Best offer {}/1 is on {}", scorePerOffer.get(bestOffer.getAgentId()), bestOffer.getSanitizedHost());
                        acceptTask(bestOffer, taskRequestHolder);
                        tasksScheduled.getAndIncrement();
                        updateAgentUsageScores(taskRequestHolder, currentUsagesById, bestOffer.getAgentId(), requestUtilizations);
                    } else {
                        noMatches.getAndIncrement();
                    }
                }
            } finally {
                offerCheckTempLock.unlock();
            }
        }, entry.getKey(), String.format("%s#%s", getClass().getSimpleName(), "checkOffers"), mesosConfiguration.getOfferLoopRequestTimeoutMillis(), TimeUnit.MILLISECONDS);
    })).collect(Collectors.toList())).join();
    LOG.info("{} tasks scheduled, {} tasks remaining after examining {} offers ({} overloaded hosts, {} had no offer matches)", tasksScheduled, numDueTasks - tasksScheduled.get(), offers.size(), overloadedHosts.size(), noMatches.get());
    return offerHolders.values();
}
Also used : CachedOffer(com.hubspot.singularity.mesos.SingularityOfferCache.CachedOffer) SingularityTask(com.hubspot.singularity.SingularityTask) DeployManager(com.hubspot.singularity.data.DeployManager) MaxProbableUsage(com.hubspot.singularity.mesos.SingularityAgentUsageWithCalculatedScores.MaxProbableUsage) Inject(com.google.inject.Inject) LoggerFactory(org.slf4j.LoggerFactory) Offer(org.apache.mesos.v1.Protos.Offer) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Resources(com.hubspot.mesos.Resources) TaskManager(com.hubspot.singularity.data.TaskManager) SingularityUsageHelper(com.hubspot.singularity.scheduler.SingularityUsageHelper) SingularityPendingTaskId(com.hubspot.singularity.SingularityPendingTaskId) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) CustomExecutorConfiguration(com.hubspot.singularity.config.CustomExecutorConfiguration) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CompletableFutures(com.hubspot.singularity.async.CompletableFutures) Set(java.util.Set) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) JavaUtils(com.hubspot.mesos.JavaUtils) List(java.util.List) MesosUtils(com.hubspot.singularity.helpers.MesosUtils) RequestUtilization(com.hubspot.singularity.RequestUtilization) UsageManager(com.hubspot.singularity.data.usage.UsageManager) Optional(java.util.Optional) CheckResult(com.hubspot.singularity.mesos.SingularityAgentAndRackManager.CheckResult) AgentMatchState(com.hubspot.singularity.AgentMatchState) SingularityLeaderCache(com.hubspot.singularity.scheduler.SingularityLeaderCache) DisasterManager(com.hubspot.singularity.data.DisasterManager) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) SingularityScheduler(com.hubspot.singularity.scheduler.SingularityScheduler) CompletableFuture(java.util.concurrent.CompletableFuture) Singleton(javax.inject.Singleton) Function(java.util.function.Function) ArrayList(java.util.ArrayList) SingularityDeployKey(com.hubspot.singularity.SingularityDeployKey) RequestType(com.hubspot.singularity.RequestType) ExecutorService(java.util.concurrent.ExecutorService) SingularityConfiguration(com.hubspot.singularity.config.SingularityConfiguration) SingularityAction(com.hubspot.singularity.SingularityAction) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) OfferID(org.apache.mesos.v1.Protos.OfferID) SingularityMesosTaskHolder(com.hubspot.singularity.helpers.SingularityMesosTaskHolder) TimeUnit(java.util.concurrent.TimeUnit) SingularityTaskRequest(com.hubspot.singularity.SingularityTaskRequest) SingularityManagedThreadPoolFactory(com.hubspot.singularity.SingularityManagedThreadPoolFactory) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MesosConfiguration(com.hubspot.singularity.config.MesosConfiguration) Collections(java.util.Collections) RequestUtilization(com.hubspot.singularity.RequestUtilization) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) List(java.util.List) ArrayList(java.util.ArrayList) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Optional(java.util.Optional) SingularityDeployKey(com.hubspot.singularity.SingularityDeployKey) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId)

Example 3 with MesosAgentMetricsSnapshotObject

use of com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject in project Singularity by HubSpot.

the class SingularityUsageTest method itDoesNotShuffleBlacklistedTasks.

@Test
public void itDoesNotShuffleBlacklistedTasks() {
    try {
        configuration.setShuffleTasksForOverloadedAgents(true);
        configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
        shuffleCfgManager.addToShuffleBlocklist(requestId);
        initRequest();
        initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
        resourceOffers(1);
        SingularityAgentUsage highUsage = new SingularityAgentUsage(15, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 200000, 30000, 10, 15, 15, 15, 0, 107374182);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highUsage, "host1"));
        SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId1.getId();
        SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
        String t2 = taskId2.getId();
        SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
        String t3 = taskId3.getId();
        statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
        // task 1 using 3 cpus
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 15, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 1024);
        // task 2 using 2 cpus
        MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 10, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 1024);
        // task 3 using 1 cpus
        MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 1024);
        mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
        mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 200000, 0, 30000, 0, 0, 0, 15, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        // First task is not cleaned up because it is a blacklisted request ID
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId1.getId()).isPresent());
        // Second task is not cleaned up because it is from the same request as task 1
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId2.getId()).isPresent());
    } finally {
        configuration.setShuffleTasksForOverloadedAgents(false);
        shuffleCfgManager.removeFromShuffleBlocklist(requestId);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) Test(org.junit.jupiter.api.Test)

Example 4 with MesosAgentMetricsSnapshotObject

use of com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject in project Singularity by HubSpot.

the class SingularityUsageTest method itCreatesTaskCleanupsWhenAMachineIsOverloadedOnMemory.

@Test
public void itCreatesTaskCleanupsWhenAMachineIsOverloadedOnMemory() {
    try {
        configuration.setShuffleTasksForOverloadedAgents(true);
        configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
        configuration.setShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds(0.90);
        initRequest();
        initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
        resourceOffers(1);
        SingularityAgentUsage highUsage = new SingularityAgentUsage(10, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 200000, 10000, 10, 10, 10, 10, 0, 107374182);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highUsage, "host1"));
        SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId1.getId();
        SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
        String t2 = taskId2.getId();
        SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
        String t3 = taskId3.getId();
        statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
        statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
        // task 1 using 3G mem
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 2, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 95000);
        // task 2 using 2G mem
        MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 5, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 63333);
        // task 3 using 1G mem
        MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 31667);
        mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
        mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 200000, 0, 10000, 0, 0, 0, 10, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        // First task is not cleaned up because it uses the most memory
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId1.getId()).isPresent());
        // Third task is cleaned up because it uses the least memory
        Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(taskId3.getId()).get().getCleanupType());
        // Second task is not cleaned up because it is from the same request as task 3
        Assertions.assertFalse(taskManager.getTaskCleanup(taskId2.getId()).isPresent());
    } finally {
        configuration.setShuffleTasksForOverloadedAgents(false);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) Test(org.junit.jupiter.api.Test)

Example 5 with MesosAgentMetricsSnapshotObject

use of com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject in project Singularity by HubSpot.

the class SingularityUsageTest method itWillShuffleMultipleTasksIfNecessaryForMemoryShuffle.

@Test
public void itWillShuffleMultipleTasksIfNecessaryForMemoryShuffle() {
    try {
        configuration.setShuffleTasksForOverloadedAgents(true);
        configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
        configuration.setMaxTasksToShufflePerHost(2);
        configuration.setMaxTasksToShuffleTotal(5);
        configuration.setShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds(0.90);
        String t1id = "test-request-1";
        String t2id = "test-request-2";
        String t3id = "test-request-3";
        scheduleTask(t1id, 1, 10);
        scheduleTask(t2id, 1, 10);
        scheduleTask(t3id, 1, 10);
        sms.resourceOffers(ImmutableList.of(createOffer(10, 100000, 100000, "agent1", "host1"))).join();
        System.out.println(taskManager.getActiveTaskIds());
        Map<String, Map<String, SingularityTaskId>> taskIdMap = getTaskIdMapByHostByRequest();
        SingularityTaskId task1 = taskIdMap.get("host1").get(t1id);
        SingularityTaskId task2 = taskIdMap.get("host1").get(t2id);
        SingularityTaskId task3 = taskIdMap.get("host1").get(t3id);
        startTask(task1);
        startTask(task2);
        startTask(task3);
        // not actually necessary to trigger shuffle, but worth leaving in case that changes
        SingularityAgentUsage highMemUsage = new SingularityAgentUsage(1, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 100000, 1000, 10, 10, 10, 10, 0, 107374182);
        usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highMemUsage, "host1"));
        MesosTaskMonitorObject t1u1 = getTaskMonitor(task1.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task1.getStartedAt()) + 5, 89000);
        MesosTaskMonitorObject t2u1 = getTaskMonitor(task2.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task2.getStartedAt()) + 5, 9000);
        MesosTaskMonitorObject t3u1 = getTaskMonitor(task3.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task3.getStartedAt()) + 5, 1000);
        mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
        mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100000, 0, 1000, 0, 0, 0, 10, 0, 0, 0, 0));
        usagePoller.runActionOnPoll();
        System.out.println(taskManager.getCleanupTaskIds().toString());
        // First task is not cleaned up, due to relatively high utilization.
        Assertions.assertFalse(taskManager.getTaskCleanup(task1.getId()).isPresent());
        // Second task is cleaned up, due to relatively low utilization.
        Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task2.getId()).get().getCleanupType());
        // Third task is also cleaned up, in order to reach desired memory utilization.
        Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task3.getId()).get().getCleanupType());
    } finally {
        configuration.setShuffleTasksForOverloadedAgents(false);
    }
}
Also used : SingularityAgentUsage(com.hubspot.singularity.SingularityAgentUsage) MesosAgentMetricsSnapshotObject(com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject) HashMap(java.util.HashMap) Map(java.util.Map) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) SingularityAgentUsageWithId(com.hubspot.singularity.SingularityAgentUsageWithId) Test(org.junit.jupiter.api.Test)

Aggregations

MesosAgentMetricsSnapshotObject (com.hubspot.mesos.json.MesosAgentMetricsSnapshotObject)13 SingularityAgentUsage (com.hubspot.singularity.SingularityAgentUsage)13 SingularityAgentUsageWithId (com.hubspot.singularity.SingularityAgentUsageWithId)13 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)13 MesosTaskMonitorObject (com.hubspot.mesos.json.MesosTaskMonitorObject)12 Test (org.junit.jupiter.api.Test)11 Map (java.util.Map)6 HashMap (java.util.HashMap)4 ArrayList (java.util.ArrayList)3 Resources (com.hubspot.mesos.Resources)2 SingularityTask (com.hubspot.singularity.SingularityTask)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Sets (com.google.common.collect.Sets)1 AtomicDouble (com.google.common.util.concurrent.AtomicDouble)1 Inject (com.google.inject.Inject)1 JavaUtils (com.hubspot.mesos.JavaUtils)1 AgentMatchState (com.hubspot.singularity.AgentMatchState)1 InvalidSingularityTaskIdException (com.hubspot.singularity.InvalidSingularityTaskIdException)1 RequestType (com.hubspot.singularity.RequestType)1 RequestUtilization (com.hubspot.singularity.RequestUtilization)1