Search in sources :

Example 1 with TaskID

use of org.apache.mesos.v1.Protos.TaskID in project Singularity by HubSpot.

the class SingularityMesosOfferScheduler method checkOffers.

public Collection<SingularityOfferHolder> checkOffers(final Collection<Offer> offers) {
    for (SingularityPendingTaskId taskId : taskManager.getPendingTasksMarkedForDeletion()) {
        lock.runWithRequestLock(() -> taskManager.deletePendingTask(taskId), taskId.getRequestId(), String.format("%s#%s", getClass().getSimpleName(), "checkOffers -> pendingTaskDeletes"));
    }
    scheduler.checkForDecomissions();
    scheduler.drainPendingQueue();
    if (offers.isEmpty()) {
        LOG.debug("No offers to check");
        return Collections.emptyList();
    }
    final List<SingularityTaskRequestHolder> sortedTaskRequestHolders = getSortedDueTaskRequests();
    final int numDueTasks = sortedTaskRequestHolders.size();
    final Map<String, SingularityOfferHolder> offerHolders = offers.stream().collect(Collectors.groupingBy((o) -> o.getAgentId().getValue())).entrySet().stream().filter((e) -> e.getValue().size() > 0).map((e) -> {
        List<Offer> offersList = e.getValue();
        String slaveId = e.getKey();
        return new SingularityOfferHolder(offersList, numDueTasks, slaveAndRackHelper.getRackIdOrDefault(offersList.get(0)), slaveId, offersList.get(0).getHostname(), slaveAndRackHelper.getTextAttributes(offersList.get(0)), slaveAndRackHelper.getReservedSlaveAttributes(offersList.get(0)));
    }).collect(Collectors.toMap(SingularityOfferHolder::getSlaveId, Function.identity()));
    if (sortedTaskRequestHolders.isEmpty()) {
        return offerHolders.values();
    }
    final AtomicInteger tasksScheduled = new AtomicInteger(0);
    Map<String, RequestUtilization> requestUtilizations = usageManager.getRequestUtilizations();
    List<SingularityTaskId> activeTaskIds = taskManager.getActiveTaskIds();
    final Map<String, SingularitySlaveUsageWithCalculatedScores> currentSlaveUsagesBySlaveId = usageManager.getCurrentSlaveUsages(offerHolders.values().stream().map(SingularityOfferHolder::getSlaveId).collect(Collectors.toList())).parallelStream().collect(Collectors.toMap(SingularitySlaveUsageWithId::getSlaveId, (usageWithId) -> new SingularitySlaveUsageWithCalculatedScores(usageWithId, configuration.getMesosConfiguration().getScoringStrategy(), configuration.getMesosConfiguration().getScoreUsingSystemLoad(), getMaxProbableUsageForSlave(activeTaskIds, requestUtilizations, offerHolders.get(usageWithId.getSlaveId()).getSanitizedHost()))));
    LOG.trace("Found slave usages {}", currentSlaveUsagesBySlaveId);
    Map<String, Integer> tasksPerOfferHost = new ConcurrentHashMap<>();
    for (SingularityTaskRequestHolder taskRequestHolder : sortedTaskRequestHolders) {
        lock.runWithRequestLock(() -> {
            Map<String, Double> scorePerOffer = new ConcurrentHashMap<>();
            List<SingularityTaskId> activeTaskIdsForRequest = leaderCache.getActiveTaskIdsForRequest(taskRequestHolder.getTaskRequest().getRequest().getId());
            List<CompletableFuture<Void>> scoringFutures = new ArrayList<>();
            AtomicReference<Throwable> scoringException = new AtomicReference<>(null);
            for (SingularityOfferHolder offerHolder : offerHolders.values()) {
                if (!isOfferFull(offerHolder)) {
                    scoringFutures.add(offerScoringSemaphore.call(() -> CompletableFuture.runAsync(() -> {
                        try {
                            double score = calculateScore(offerHolder, currentSlaveUsagesBySlaveId, tasksPerOfferHost, taskRequestHolder, activeTaskIdsForRequest);
                            if (score != 0) {
                                scorePerOffer.put(offerHolder.getSlaveId(), score);
                            }
                        } catch (Throwable t) {
                            LOG.error("Uncaught exception while scoring offers", t);
                            scoringException.set(t);
                        }
                    }, offerScoringExecutor)));
                }
            }
            CompletableFutures.allOf(scoringFutures).join();
            if (scoringException.get() != null) {
                LOG.warn("Exception caught in offer scoring futures, semaphore info: (concurrentRequests: {}, queueSize: {})", offerScoringSemaphore.getConcurrentRequests(), offerScoringSemaphore.getQueueSize());
                // This will be caught by either the LeaderOnlyPoller or resourceOffers uncaught exception code, causing an abort
                throw new RuntimeException(scoringException.get());
            }
            if (!scorePerOffer.isEmpty()) {
                SingularityOfferHolder bestOffer = offerHolders.get(Collections.max(scorePerOffer.entrySet(), Map.Entry.comparingByValue()).getKey());
                LOG.info("Best offer {}/1 is on {}", scorePerOffer.get(bestOffer.getSlaveId()), bestOffer.getSanitizedHost());
                SingularityMesosTaskHolder taskHolder = acceptTask(bestOffer, tasksPerOfferHost, taskRequestHolder);
                tasksScheduled.getAndIncrement();
                bestOffer.addMatchedTask(taskHolder);
                updateSlaveUsageScores(taskRequestHolder, currentSlaveUsagesBySlaveId, bestOffer.getSlaveId(), requestUtilizations);
            }
        }, taskRequestHolder.getTaskRequest().getRequest().getId(), String.format("%s#%s", getClass().getSimpleName(), "checkOffers"));
    }
    LOG.info("{} tasks scheduled, {} tasks remaining after examining {} offers", tasksScheduled, numDueTasks - tasksScheduled.get(), offers.size());
    return offerHolders.values();
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) SingularityTask(com.hubspot.singularity.SingularityTask) DeployManager(com.hubspot.singularity.data.DeployManager) Inject(com.google.inject.Inject) LoggerFactory(org.slf4j.LoggerFactory) CompletableFuture(java.util.concurrent.CompletableFuture) Offer(org.apache.mesos.v1.Protos.Offer) SingularityScheduler(com.hubspot.singularity.scheduler.SingularityScheduler) Singleton(javax.inject.Singleton) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) ArrayList(java.util.ArrayList) SingularityDeployStatistics(com.hubspot.singularity.SingularityDeployStatistics) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Optional(com.google.common.base.Optional) Map(java.util.Map) SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) Resources(com.hubspot.mesos.Resources) RequestType(com.hubspot.singularity.RequestType) TaskManager(com.hubspot.singularity.data.TaskManager) ExecutorService(java.util.concurrent.ExecutorService) SingularityPendingTaskId(com.hubspot.singularity.SingularityPendingTaskId) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityConfiguration(com.hubspot.singularity.config.SingularityConfiguration) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) Logger(org.slf4j.Logger) MaxProbableUsage(com.hubspot.singularity.mesos.SingularitySlaveUsageWithCalculatedScores.MaxProbableUsage) CustomExecutorConfiguration(com.hubspot.singularity.config.CustomExecutorConfiguration) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) CompletableFutures(com.hubspot.singularity.async.CompletableFutures) AsyncSemaphore(com.hubspot.singularity.async.AsyncSemaphore) Collectors(java.util.stream.Collectors) SlaveMatchState(com.hubspot.singularity.SlaveMatchState) Executors(java.util.concurrent.Executors) SingularityMesosTaskHolder(com.hubspot.singularity.helpers.SingularityMesosTaskHolder) TimeUnit(java.util.concurrent.TimeUnit) UsageManager(com.hubspot.singularity.data.UsageManager) SingularityTaskRequest(com.hubspot.singularity.SingularityTaskRequest) List(java.util.List) MesosUtils(com.hubspot.singularity.helpers.MesosUtils) RequestUtilization(com.hubspot.singularity.RequestUtilization) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) SingularitySlaveUsageWithId(com.hubspot.singularity.SingularitySlaveUsageWithId) MesosConfiguration(com.hubspot.singularity.config.MesosConfiguration) SingularityLeaderCache(com.hubspot.singularity.scheduler.SingularityLeaderCache) RequestUtilization(com.hubspot.singularity.RequestUtilization) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) SingularityPendingTaskId(com.hubspot.singularity.SingularityPendingTaskId) SingularityMesosTaskHolder(com.hubspot.singularity.helpers.SingularityMesosTaskHolder) AtomicReference(java.util.concurrent.atomic.AtomicReference) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 2 with TaskID

use of org.apache.mesos.v1.Protos.TaskID in project Singularity by HubSpot.

the class SingularityOfferPerformanceTestRunner method testSchedulerPerformance.

@Test(timeout = 600000L)
@Ignore
public void testSchedulerPerformance() {
    long start = System.currentTimeMillis();
    int numRequests = 4500;
    int numOffers = 2500;
    Random r = new Random();
    Iterator<Double> cpuIterator = r.doubles(1, 5).iterator();
    Iterator<Double> memoryIterator = r.doubles(15, 20000).iterator();
    Iterator<Double> diskIterator = r.doubles(30, 50000).iterator();
    for (int i = 0; i < numRequests; i++) {
        SingularityRequestBuilder bldr = new SingularityRequestBuilder("request-" + i, RequestType.SERVICE);
        bldr.setInstances(Optional.of(5));
        bldr.setSlavePlacement(Optional.of(SlavePlacement.GREEDY));
        SingularityRequest request = bldr.build();
        saveRequest(request);
        deployRequest(request, cpuIterator.next(), memoryIterator.next());
    }
    List<Offer> offers = new ArrayList<>(numOffers);
    for (int i = 0; i < numOffers; i++) {
        offers.add(createOffer(cpuIterator.next(), memoryIterator.next(), diskIterator.next(), "slave-" + i, "host-" + i));
    }
    System.out.println("Starting after " + JavaUtils.duration(start));
    start = System.currentTimeMillis();
    sms.resourceOffers(offers);
    final long duration = System.currentTimeMillis() - start;
    int activeTasks = taskManager.getActiveTaskIds().size();
    System.out.println(String.format("Launched %s tasks on %s offers in %s", activeTasks, numOffers, JavaUtils.durationFromMillis(duration)));
    start = System.currentTimeMillis();
    ExecutorService statusUpadtes = Executors.newFixedThreadPool(100);
    CompletableFuture<Void>[] updateFutures = new CompletableFuture[taskManager.getActiveTaskIds().size() * 5];
    AtomicInteger i = new AtomicInteger(0);
    for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) {
        updateFutures[i.getAndIncrement()] = CompletableFuture.supplyAsync(() -> {
            statusUpdate(taskManager.getTask(taskId).get(), TaskState.TASK_STAGING);
            return null;
        }, statusUpadtes);
        updateFutures[i.getAndIncrement()] = CompletableFuture.supplyAsync(() -> {
            statusUpdate(taskManager.getTask(taskId).get(), TaskState.TASK_STARTING);
            return null;
        }, statusUpadtes);
        updateFutures[i.getAndIncrement()] = CompletableFuture.supplyAsync(() -> {
            statusUpdate(taskManager.getTask(taskId).get(), TaskState.TASK_RUNNING);
            return null;
        }, statusUpadtes);
        updateFutures[i.getAndIncrement()] = CompletableFuture.supplyAsync(() -> {
            statusUpdate(taskManager.getTask(taskId).get(), TaskState.TASK_FAILED);
            return null;
        }, statusUpadtes);
        updateFutures[i.getAndIncrement()] = CompletableFuture.supplyAsync(() -> {
            statusUpdate(taskManager.getTask(taskId).get(), TaskState.TASK_FAILED);
            return null;
        }, statusUpadtes);
    }
    CompletableFuture.allOf(updateFutures).join();
    System.out.println(String.format("Ran %s status updates in %s", activeTasks * 5, JavaUtils.durationFromMillis(System.currentTimeMillis() - start)));
}
Also used : SingularityRequestBuilder(com.hubspot.singularity.SingularityRequestBuilder) SingularityRequest(com.hubspot.singularity.SingularityRequest) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) Random(java.util.Random) Offer(org.apache.mesos.v1.Protos.Offer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutorService(java.util.concurrent.ExecutorService) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with TaskID

use of org.apache.mesos.v1.Protos.TaskID in project Singularity by HubSpot.

the class SingularityMesosOfferSchedulerTest method itAccountsForMaxHistoricalTaskUsage.

@Test
public void itAccountsForMaxHistoricalTaskUsage() {
    try {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.PROBABLE_MAX_USAGE);
        initRequest();
        double cpuReserved = 2;
        double memMbReserved = 1000;
        initFirstDeployWithResources(cpuReserved, memMbReserved);
        saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(1)));
        resourceOffers(3);
        SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0);
        String t1 = taskId.getId();
        // 2 cpus used
        MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 5, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u1));
        usagePoller.runActionOnPoll();
        // 1 cpus used
        MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 11, TimeUnit.MILLISECONDS.toSeconds(taskId.getStartedAt()) + 6, 1000);
        mesosClient.setSlaveResourceUsage("host1", Collections.singletonList(t1u2));
        usagePoller.runActionOnPoll();
        Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
        longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0.1);
        longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0.1);
        SingularitySlaveUsage smallUsage = new SingularitySlaveUsage(0.1, 0.1, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), longRunningTasksUsage, 1, System.currentTimeMillis(), 1, 30000, 10, 0, 0, 0, 0, 107374182);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host1", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host2", smallUsage);
        usageManager.saveSpecificSlaveUsageAndSetCurrent("host3", smallUsage);
        requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), SingularityUser.DEFAULT_USER);
        Assert.assertEquals(3.0, usageManager.getRequestUtilizations().get(requestId).getCpuUsed(), 0.001);
        Offer host2Offer = createOffer(6, 30000, 107374182, "host2", "host2");
        slaveAndRackManager.checkOffer(host2Offer);
        Offer host3Offer = createOffer(6, 30000, 107374182, "host3", "host3");
        slaveAndRackManager.checkOffer(host3Offer);
        Collection<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(Arrays.asList(host2Offer, host3Offer));
        Assert.assertEquals(2, offerHolders.size());
        // A single offer should only ever get a single task even though both have room for both tasks here. Adding a task should reduce the score for the next check
        for (SingularityOfferHolder offerHolder : offerHolders) {
            Assert.assertEquals(1, offerHolder.getAcceptedTasks().size());
        }
    } finally {
        configuration.getMesosConfiguration().setScoringStrategy(SingularityUsageScoringStrategy.SPREAD_TASK_USAGE);
    }
}
Also used : SingularitySlaveUsage(com.hubspot.singularity.SingularitySlaveUsage) HashMap(java.util.HashMap) MesosTaskMonitorObject(com.hubspot.mesos.json.MesosTaskMonitorObject) Offer(org.apache.mesos.v1.Protos.Offer) SingularityScaleRequest(com.hubspot.singularity.api.SingularityScaleRequest) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) ResourceUsageType(com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType) Test(org.junit.Test)

Example 4 with TaskID

use of org.apache.mesos.v1.Protos.TaskID in project Singularity by HubSpot.

the class SingularityMesosSchedulerClient method acknowledge.

/**
 * Sent by the scheduler to acknowledge a status update. Note that with the new API, schedulers are responsible
 * for explicitly acknowledging the receipt of status updates that have status.uuid set. These status updates
 * are retried until they are acknowledged by the scheduler. The scheduler must not acknowledge status updates
 * that do not have status.uuid set, as they are not retried. The uuid field contains raw bytes encoded in Base64.
 *
 * @param agentId
 * @param taskId
 * @param uuid
 */
public void acknowledge(AgentID agentId, TaskID taskId, ByteString uuid) {
    Builder acknowledge = build().setAcknowledge(Acknowledge.newBuilder().setAgentId(agentId).setTaskId(taskId).setUuid(uuid));
    sendCall(acknowledge, Type.ACKNOWLEDGE);
}
Also used : MesosClientBuilder(com.mesosphere.mesos.rx.java.MesosClientBuilder) ProtobufMesosClientBuilder(com.mesosphere.mesos.rx.java.protobuf.ProtobufMesosClientBuilder) Builder(org.apache.mesos.v1.scheduler.Protos.Call.Builder)

Example 5 with TaskID

use of org.apache.mesos.v1.Protos.TaskID in project Singularity by HubSpot.

the class SingularityMesosSchedulerClient method kill.

/**
 * Sent by the scheduler to kill a specific task. If the scheduler has a custom executor, the kill is
 * forwarded to the executor; it is up to the executor to kill the task and send a TASK_KILLED
 * (or TASK_FAILED) update. If the task hasn’t yet been delivered to the executor when Mesos master or
 * agent receives the kill request, a TASK_KILLED is generated and the task launch is not forwarded to
 * the executor. Note that if the task belongs to a task group, killing of one task results in all tasks
 * in the task group being killed. Mesos releases the resources for a task once it receives a terminal
 * update for the task. If the task is unknown to the master, a TASK_LOST will be generated.
 *
 * @param taskId
 */
public void kill(TaskID taskId) {
    Builder kill = build().setKill(Kill.newBuilder().setTaskId(taskId));
    sendCall(kill, Type.KILL);
}
Also used : MesosClientBuilder(com.mesosphere.mesos.rx.java.MesosClientBuilder) ProtobufMesosClientBuilder(com.mesosphere.mesos.rx.java.protobuf.ProtobufMesosClientBuilder) Builder(org.apache.mesos.v1.scheduler.Protos.Call.Builder)

Aggregations

SingularityTaskId (com.hubspot.singularity.SingularityTaskId)6 MesosClientBuilder (com.mesosphere.mesos.rx.java.MesosClientBuilder)5 ProtobufMesosClientBuilder (com.mesosphere.mesos.rx.java.protobuf.ProtobufMesosClientBuilder)5 ArrayList (java.util.ArrayList)5 Offer (org.apache.mesos.v1.Protos.Offer)5 Builder (org.apache.mesos.v1.scheduler.Protos.Call.Builder)5 ByteString (com.google.protobuf.ByteString)4 HashMap (java.util.HashMap)4 Resources (com.hubspot.mesos.Resources)3 SingularitySlaveUsage (com.hubspot.singularity.SingularitySlaveUsage)3 ResourceUsageType (com.hubspot.singularity.SingularitySlaveUsage.ResourceUsageType)3 SingularityTask (com.hubspot.singularity.SingularityTask)3 SingularityTaskRequest (com.hubspot.singularity.SingularityTaskRequest)3 Optional (com.google.common.base.Optional)2 Inject (com.google.inject.Inject)2 SingularityContainerInfo (com.hubspot.mesos.SingularityContainerInfo)2 SingularityDockerInfo (com.hubspot.mesos.SingularityDockerInfo)2 SingularityDockerParameter (com.hubspot.mesos.SingularityDockerParameter)2 SingularityDockerPortMapping (com.hubspot.mesos.SingularityDockerPortMapping)2 SingularityDockerVolume (com.hubspot.mesos.SingularityDockerVolume)2