Search in sources :

Example 6 with MachineState

use of com.hubspot.singularity.MachineState in project Singularity by HubSpot.

the class SingularitySlaveAndRackManager method doesOfferMatch.

SlaveMatchState doesOfferMatch(SingularityOfferHolder offerHolder, SingularityTaskRequest taskRequest, List<SingularityTaskId> activeTaskIdsForRequest) {
    final String host = offerHolder.getHostname();
    final String rackId = offerHolder.getRackId();
    final String slaveId = offerHolder.getSlaveId();
    final MachineState currentSlaveState = slaveManager.getSlave(slaveId).get().getCurrentState().getState();
    if (currentSlaveState == MachineState.FROZEN) {
        return SlaveMatchState.SLAVE_FROZEN;
    }
    if (currentSlaveState.isDecommissioning()) {
        return SlaveMatchState.SLAVE_DECOMMISSIONING;
    }
    final MachineState currentRackState = rackManager.getRack(rackId).get().getCurrentState().getState();
    if (currentRackState == MachineState.FROZEN) {
        return SlaveMatchState.RACK_FROZEN;
    }
    if (currentRackState.isDecommissioning()) {
        return SlaveMatchState.RACK_DECOMMISSIONING;
    }
    if (!taskRequest.getRequest().getRackAffinity().or(Collections.emptyList()).isEmpty()) {
        if (!taskRequest.getRequest().getRackAffinity().get().contains(rackId)) {
            LOG.trace("Task {} requires a rack in {} (current rack {})", taskRequest.getPendingTask().getPendingTaskId(), taskRequest.getRequest().getRackAffinity().get(), rackId);
            return SlaveMatchState.RACK_AFFINITY_NOT_MATCHING;
        }
    }
    if (!isSlaveAttributesMatch(offerHolder, taskRequest)) {
        return SlaveMatchState.SLAVE_ATTRIBUTES_DO_NOT_MATCH;
    }
    final SlavePlacement slavePlacement = taskRequest.getRequest().getSlavePlacement().or(configuration.getDefaultSlavePlacement());
    if (!taskRequest.getRequest().isRackSensitive() && slavePlacement == SlavePlacement.GREEDY) {
        // todo: account for this or let this behavior continue?
        return SlaveMatchState.NOT_RACK_OR_SLAVE_PARTICULAR;
    }
    final int numDesiredInstances = taskRequest.getRequest().getInstancesSafe();
    boolean allowBounceToSameHost = isAllowBounceToSameHost(taskRequest.getRequest());
    Multiset<String> countPerRack = HashMultiset.create(slaveManager.getNumActive());
    double numOnSlave = 0;
    double numCleaningOnSlave = 0;
    double numFromSameBounceOnSlave = 0;
    double numOtherDeploysOnSlave = 0;
    boolean taskLaunchedFromBounceWithActionId = taskRequest.getPendingTask().getPendingTaskId().getPendingType() == PendingType.BOUNCE && taskRequest.getPendingTask().getActionId().isPresent();
    final String sanitizedHost = offerHolder.getSanitizedHost();
    final String sanitizedRackId = offerHolder.getSanitizedRackId();
    Collection<SingularityTaskId> cleaningTasks = leaderCache.getCleanupTaskIds();
    for (SingularityTaskId taskId : activeTaskIdsForRequest) {
        if (!cleaningTasks.contains(taskId) && taskRequest.getDeploy().getId().equals(taskId.getDeployId())) {
            countPerRack.add(taskId.getSanitizedRackId());
        }
        if (!taskId.getSanitizedHost().equals(sanitizedHost)) {
            continue;
        }
        if (taskRequest.getDeploy().getId().equals(taskId.getDeployId())) {
            if (cleaningTasks.contains(taskId)) {
                numCleaningOnSlave++;
            } else {
                numOnSlave++;
            }
            if (taskLaunchedFromBounceWithActionId) {
                Optional<SingularityTask> maybeTask = taskManager.getTask(taskId);
                boolean errorInTaskData = false;
                if (maybeTask.isPresent()) {
                    SingularityPendingTask pendingTask = maybeTask.get().getTaskRequest().getPendingTask();
                    if (pendingTask.getPendingTaskId().getPendingType() == PendingType.BOUNCE) {
                        if (pendingTask.getActionId().isPresent()) {
                            if (pendingTask.getActionId().get().equals(taskRequest.getPendingTask().getActionId().get())) {
                                numFromSameBounceOnSlave++;
                            }
                        } else {
                            // No actionId present on bounce, fall back to more restrictive placement strategy
                            errorInTaskData = true;
                        }
                    }
                } else {
                    // Could not find appropriate task data, fall back to more restrictive placement strategy
                    errorInTaskData = true;
                }
                if (errorInTaskData) {
                    allowBounceToSameHost = false;
                }
            }
        } else {
            numOtherDeploysOnSlave++;
        }
    }
    if (taskRequest.getRequest().isRackSensitive()) {
        final boolean isRackOk = isRackOk(countPerRack, sanitizedRackId, numDesiredInstances, taskRequest.getRequest().getId(), slaveId, host, numCleaningOnSlave, leaderCache);
        if (!isRackOk) {
            return SlaveMatchState.RACK_SATURATED;
        }
    }
    switch(slavePlacement) {
        case SEPARATE:
        case SEPARATE_BY_DEPLOY:
        case SPREAD_ALL_SLAVES:
            if (allowBounceToSameHost && taskLaunchedFromBounceWithActionId) {
                if (numFromSameBounceOnSlave > 0) {
                    LOG.trace("Rejecting SEPARATE task {} from slave {} ({}) due to numFromSameBounceOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numFromSameBounceOnSlave);
                    return SlaveMatchState.SLAVE_SATURATED;
                }
            } else {
                if (numOnSlave > 0 || numCleaningOnSlave > 0) {
                    LOG.trace("Rejecting {} task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {}", slavePlacement.name(), taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave);
                    return SlaveMatchState.SLAVE_SATURATED;
                }
            }
            break;
        case SEPARATE_BY_REQUEST:
            if (numOnSlave > 0 || numCleaningOnSlave > 0 || numOtherDeploysOnSlave > 0) {
                LOG.trace("Rejecting SEPARATE_BY_REQUEST task {} from slave {} ({}) due to numOnSlave {} numCleaningOnSlave {} numOtherDeploysOnSlave {}", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, numCleaningOnSlave, numOtherDeploysOnSlave);
                return SlaveMatchState.SLAVE_SATURATED;
            }
            break;
        case OPTIMISTIC:
            // If no tasks are active for this request yet, we can fall back to greedy.
            if (activeTaskIdsForRequest.size() > 0) {
                Collection<SingularityPendingTaskId> pendingTasksForRequestClusterwide = leaderCache.getPendingTaskIdsForRequest(taskRequest.getRequest().getId());
                Set<String> currentHostsForRequest = activeTaskIdsForRequest.stream().map(SingularityTaskId::getSanitizedHost).collect(Collectors.toSet());
                final double numPerSlave = activeTaskIdsForRequest.size() / (double) currentHostsForRequest.size();
                final double leniencyCoefficient = configuration.getPlacementLeniency();
                final double threshold = numPerSlave * (1 + (pendingTasksForRequestClusterwide.size() * leniencyCoefficient));
                final boolean isSlaveOk = numOnSlave <= threshold;
                if (!isSlaveOk) {
                    LOG.trace("Rejecting OPTIMISTIC task {} from slave {} ({}) because numOnSlave {} violates threshold {} (based on active tasks for request {}, current hosts for request {}, pending tasks for request {})", taskRequest.getRequest().getId(), slaveId, host, numOnSlave, threshold, activeTaskIdsForRequest.size(), currentHostsForRequest.size(), pendingTasksForRequestClusterwide.size());
                    return SlaveMatchState.SLAVE_SATURATED;
                }
            }
            break;
        case GREEDY:
    }
    return SlaveMatchState.OK;
}
Also used : SingularityPendingTaskId(com.hubspot.singularity.SingularityPendingTaskId) SingularityTask(com.hubspot.singularity.SingularityTask) SingularityPendingTask(com.hubspot.singularity.SingularityPendingTask) SlavePlacement(com.hubspot.singularity.SlavePlacement) SingularityTaskId(com.hubspot.singularity.SingularityTaskId) MachineState(com.hubspot.singularity.MachineState)

Aggregations

MachineState (com.hubspot.singularity.MachineState)6 SingularitySlave (com.hubspot.singularity.SingularitySlave)2 SingularityTask (com.hubspot.singularity.SingularityTask)2 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)2 SingularityExpiringMachineState (com.hubspot.singularity.expiring.SingularityExpiringMachineState)2 Timed (com.codahale.metrics.annotation.Timed)1 Optional (com.google.common.base.Optional)1 SingularityPendingRequest (com.hubspot.singularity.SingularityPendingRequest)1 SingularityPendingTask (com.hubspot.singularity.SingularityPendingTask)1 SingularityPendingTaskId (com.hubspot.singularity.SingularityPendingTaskId)1 SingularityRack (com.hubspot.singularity.SingularityRack)1 SlavePlacement (com.hubspot.singularity.SlavePlacement)1 SingularityMachineChangeRequest (com.hubspot.singularity.api.SingularityMachineChangeRequest)1