Search in sources :

Example 1 with InternalNodeInfo

use of com.facebook.presto.execution.scheduler.InternalNodeInfo in project presto by prestodb.

the class DynamicBucketNodeMap method assignOrUpdateBucketToNode.

@Override
public void assignOrUpdateBucketToNode(int bucketedId, InternalNode node, boolean cacheable) {
    checkArgument(bucketedId >= 0 && bucketedId < bucketCount);
    requireNonNull(node, "node is null");
    bucketToNodeInfo.put(bucketedId, new InternalNodeInfo(node, cacheable));
}
Also used : InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo)

Example 2 with InternalNodeInfo

use of com.facebook.presto.execution.scheduler.InternalNodeInfo in project presto by prestodb.

the class SimpleNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeMap nodeMap = this.nodeMap.get().get();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    List<InternalNode> eligibleNodes = getEligibleNodes(maxTasksPerStage, nodeMap, existingTasks);
    NodeSelection randomNodeSelection = new RandomNodeSelection(eligibleNodes, minCandidates);
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
    OptionalInt preferredNodeCount = OptionalInt.empty();
    for (Split split : splits) {
        List<InternalNode> candidateNodes;
        switch(split.getNodeSelectionStrategy()) {
            case HARD_AFFINITY:
                candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
                preferredNodeCount = OptionalInt.of(candidateNodes.size());
                break;
            case SOFT_AFFINITY:
                // Using all nodes for soft affinity scheduling with modular hashing because otherwise temporarily down nodes would trigger too much rehashing
                if (nodeSelectionHashStrategy == MODULAR_HASHING) {
                    nodeProvider = new ModularHashingNodeProvider(nodeMap.getAllNodes());
                }
                candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
                preferredNodeCount = OptionalInt.of(candidateNodes.size());
                candidateNodes = ImmutableList.<InternalNode>builder().addAll(candidateNodes).addAll(randomNodeSelection.pickNodes(split)).build();
                break;
            case NO_PREFERENCE:
                candidateNodes = randomNodeSelection.pickNodes(split);
                break;
            default:
                throw new PrestoException(NODE_SELECTION_NOT_SUPPORTED, format("Unsupported node selection strategy %s", split.getNodeSelectionStrategy()));
        }
        if (candidateNodes.isEmpty()) {
            log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
            throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
        }
        SplitWeight splitWeight = split.getSplitWeight();
        Optional<InternalNodeInfo> chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getTotalSplitsWeight, preferredNodeCount, maxSplitsWeightPerNode, assignmentStats);
        if (!chosenNodeInfo.isPresent()) {
            chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getQueuedSplitsWeightForStage, preferredNodeCount, maxPendingSplitsWeightPerTask, assignmentStats);
        }
        if (chosenNodeInfo.isPresent()) {
            split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(chosenNodeInfo.get().isCacheable()));
            InternalNode chosenNode = chosenNodeInfo.get().getInternalNode();
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, splitWeight);
        } else {
            if (split.getNodeSelectionStrategy() != HARD_AFFINITY) {
                splitWaitingForAnyNode = true;
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
        }
    }
    ListenableFuture<?> blocked;
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : NodeAssignmentStats(com.facebook.presto.execution.scheduler.NodeAssignmentStats) InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo) PrestoException(com.facebook.presto.spi.PrestoException) NodeProvider(com.facebook.presto.spi.NodeProvider) ModularHashingNodeProvider(com.facebook.presto.execution.scheduler.ModularHashingNodeProvider) OptionalInt(java.util.OptionalInt) ModularHashingNodeProvider(com.facebook.presto.execution.scheduler.ModularHashingNodeProvider) SplitWeight(com.facebook.presto.spi.SplitWeight) SplitContext(com.facebook.presto.spi.SplitContext) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) InternalNode(com.facebook.presto.metadata.InternalNode) Split(com.facebook.presto.metadata.Split) SplitPlacementResult(com.facebook.presto.execution.scheduler.SplitPlacementResult) Sets.newHashSet(com.google.common.collect.Sets.newHashSet) HashSet(java.util.HashSet)

Example 3 with InternalNodeInfo

use of com.facebook.presto.execution.scheduler.InternalNodeInfo in project presto by prestodb.

the class SimpleTtlNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    boolean isNodeSelectionStrategyNoPreference = splits.stream().allMatch(split -> split.getNodeSelectionStrategy() == NodeSelectionStrategy.NO_PREFERENCE);
    // Current NodeSelectionStrategy support is limited to NO_PREFERENCE
    if (!isNodeSelectionStrategyNoPreference) {
        return simpleNodeSelector.computeAssignments(splits, existingTasks);
    }
    ImmutableMultimap.Builder<InternalNode, Split> assignment = ImmutableMultimap.builder();
    NodeMap nodeMap = this.nodeMap.get().get();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    List<InternalNode> eligibleNodes = getEligibleNodes(maxTasksPerStage, nodeMap, existingTasks);
    NodeSelection randomNodeSelection = new RandomNodeSelection(eligibleNodes, minCandidates);
    boolean splitWaitingForAnyNode = false;
    OptionalInt preferredNodeCount = OptionalInt.empty();
    for (Split split : splits) {
        if (split.getNodeSelectionStrategy() != NodeSelectionStrategy.NO_PREFERENCE) {
            throw new PrestoException(NODE_SELECTION_NOT_SUPPORTED, format("Unsupported node selection strategy for TTL scheduling: %s", split.getNodeSelectionStrategy()));
        }
        List<InternalNode> candidateNodes = randomNodeSelection.pickNodes(split);
        if (candidateNodes.isEmpty()) {
            log.warn("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
            throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
        }
        SplitWeight splitWeight = split.getSplitWeight();
        Optional<InternalNodeInfo> chosenNodeInfo = simpleNodeSelector.chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getTotalSplitsWeight, preferredNodeCount, maxSplitsWeightPerNode, assignmentStats);
        if (!chosenNodeInfo.isPresent()) {
            chosenNodeInfo = simpleNodeSelector.chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getQueuedSplitsWeightForStage, preferredNodeCount, maxPendingSplitsWeightPerTask, assignmentStats);
        }
        if (chosenNodeInfo.isPresent()) {
            split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(chosenNodeInfo.get().isCacheable()));
            InternalNode chosenNode = chosenNodeInfo.get().getInternalNode();
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, splitWeight);
        } else {
            splitWaitingForAnyNode = true;
        }
    }
    ListenableFuture<?> blocked = splitWaitingForAnyNode ? toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask)) : immediateFuture(null);
    return new SplitPlacementResult(blocked, assignment.build());
}
Also used : NodeAssignmentStats(com.facebook.presto.execution.scheduler.NodeAssignmentStats) InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) SplitWeight(com.facebook.presto.spi.SplitWeight) SplitContext(com.facebook.presto.spi.SplitContext) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) InternalNode(com.facebook.presto.metadata.InternalNode) Split(com.facebook.presto.metadata.Split) SplitPlacementResult(com.facebook.presto.execution.scheduler.SplitPlacementResult)

Example 4 with InternalNodeInfo

use of com.facebook.presto.execution.scheduler.InternalNodeInfo in project presto by prestodb.

the class SimpleNodeSelector method chooseLeastBusyNode.

protected Optional<InternalNodeInfo> chooseLeastBusyNode(SplitWeight splitWeight, List<InternalNode> candidateNodes, ToLongFunction<InternalNode> splitWeightProvider, OptionalInt preferredNodeCount, long maxSplitsWeight, NodeAssignmentStats assignmentStats) {
    long minWeight = Long.MAX_VALUE;
    InternalNode chosenNode = null;
    for (int i = 0; i < candidateNodes.size(); i++) {
        InternalNode node = candidateNodes.get(i);
        if (node.getNodeStatus() == DEAD) {
            // Node is down. Do not schedule split. Skip it.
            if (preferredNodeCount.isPresent() && i < preferredNodeCount.getAsInt()) {
                nodeSelectionStats.incrementPreferredNonAliveNodeSkippedCount();
            }
            continue;
        }
        if (assignmentStats.getUnacknowledgedSplitCountForStage(node) >= maxUnacknowledgedSplitsPerTask) {
            continue;
        }
        long currentWeight = splitWeightProvider.applyAsLong(node);
        boolean canAssignToNode = canAssignSplitBasedOnWeight(currentWeight, maxSplitsWeight, splitWeight);
        // choose the preferred node first as long as they're not busy
        if (preferredNodeCount.isPresent() && i < preferredNodeCount.getAsInt() && canAssignToNode) {
            if (i == 0) {
                nodeSelectionStats.incrementPrimaryPreferredNodeSelectedCount();
            } else {
                nodeSelectionStats.incrementNonPrimaryPreferredNodeSelectedCount();
            }
            return Optional.of(new InternalNodeInfo(node, true));
        }
        // fallback to choosing the least busy nodes
        if (canAssignToNode && currentWeight < minWeight) {
            chosenNode = node;
            minWeight = currentWeight;
        }
    }
    if (chosenNode == null) {
        return Optional.empty();
    }
    nodeSelectionStats.incrementNonPreferredNodeSelectedCount();
    return Optional.of(new InternalNodeInfo(chosenNode, false));
}
Also used : InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo) InternalNode(com.facebook.presto.metadata.InternalNode)

Aggregations

InternalNodeInfo (com.facebook.presto.execution.scheduler.InternalNodeInfo)4 InternalNode (com.facebook.presto.metadata.InternalNode)3 BucketNodeMap (com.facebook.presto.execution.scheduler.BucketNodeMap)2 NodeAssignmentStats (com.facebook.presto.execution.scheduler.NodeAssignmentStats)2 NodeMap (com.facebook.presto.execution.scheduler.NodeMap)2 SplitPlacementResult (com.facebook.presto.execution.scheduler.SplitPlacementResult)2 Split (com.facebook.presto.metadata.Split)2 PrestoException (com.facebook.presto.spi.PrestoException)2 SplitContext (com.facebook.presto.spi.SplitContext)2 SplitWeight (com.facebook.presto.spi.SplitWeight)2 OptionalInt (java.util.OptionalInt)2 ModularHashingNodeProvider (com.facebook.presto.execution.scheduler.ModularHashingNodeProvider)1 NodeProvider (com.facebook.presto.spi.NodeProvider)1 ImmutableMultimap (com.google.common.collect.ImmutableMultimap)1 Sets.newHashSet (com.google.common.collect.Sets.newHashSet)1 HashSet (java.util.HashSet)1