Search in sources :

Example 1 with ResettableRandomizedIterator

use of com.facebook.presto.execution.scheduler.ResettableRandomizedIterator in project presto by prestodb.

the class SimpleTtlNodeSelector method getEligibleNodes.

private List<InternalNode> getEligibleNodes(int limit, NodeMap nodeMap, List<RemoteTask> existingTasks) {
    Map<InternalNode, NodeTtl> nodeTtlInfo = nodeTtlFetcherManager.getAllTtls();
    Map<InternalNode, Optional<ConfidenceBasedTtlInfo>> ttlInfo = nodeTtlInfo.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, e -> e.getValue().getTtlInfo().stream().min(Comparator.comparing(ConfidenceBasedTtlInfo::getExpiryInstant))));
    Duration estimatedExecutionTimeRemaining = getEstimatedExecutionTimeRemaining();
    // Of the nodes on which already have existing tasks, pick only those whose TTL is enough
    List<InternalNode> existingEligibleNodes = existingTasks.stream().map(remoteTask -> nodeMap.getActiveNodesByNodeId().get(remoteTask.getNodeId())).filter(Objects::nonNull).filter(ttlInfo::containsKey).filter(node -> ttlInfo.get(node).isPresent()).filter(node -> isTtlEnough(ttlInfo.get(node).get(), estimatedExecutionTimeRemaining)).collect(toList());
    int alreadySelectedNodeCount = existingEligibleNodes.size();
    List<InternalNode> activeNodes = nodeMap.getActiveNodes();
    List<InternalNode> newEligibleNodes = filterNodesByTtl(activeNodes, ImmutableSet.copyOf(existingEligibleNodes), ttlInfo, estimatedExecutionTimeRemaining);
    if (alreadySelectedNodeCount < limit && newEligibleNodes.size() > 0) {
        List<InternalNode> moreNodes = selectNodes(limit - alreadySelectedNodeCount, new ResettableRandomizedIterator<>(newEligibleNodes));
        existingEligibleNodes.addAll(moreNodes);
    }
    verify(existingEligibleNodes.stream().allMatch(Objects::nonNull), "existingNodes list must not contain any nulls");
    return existingEligibleNodes;
}
Also used : NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) NodeScheduler.selectNodes(com.facebook.presto.execution.scheduler.NodeScheduler.selectNodes) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SplitContext(com.facebook.presto.spi.SplitContext) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) Map(java.util.Map) ConfidenceBasedTtlInfo(com.facebook.presto.spi.ttl.ConfidenceBasedTtlInfo) NODE_SELECTION_NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NODE_SELECTION_NOT_SUPPORTED) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) NO_NODES_AVAILABLE(com.facebook.presto.spi.StandardErrorCode.NO_NODES_AVAILABLE) Instant(java.time.Instant) String.format(java.lang.String.format) Objects(java.util.Objects) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NodeTtlFetcherManager(com.facebook.presto.ttl.nodettlfetchermanagers.NodeTtlFetcherManager) ResettableRandomizedIterator(com.facebook.presto.execution.scheduler.ResettableRandomizedIterator) NodeTtl(com.facebook.presto.spi.ttl.NodeTtl) Optional(java.util.Optional) NodeScheduler.calculateLowWatermark(com.facebook.presto.execution.scheduler.NodeScheduler.calculateLowWatermark) Logger(com.facebook.airlift.log.Logger) SplitPlacementResult(com.facebook.presto.execution.scheduler.SplitPlacementResult) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo) Supplier(com.google.common.base.Supplier) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) QueryManager(com.facebook.presto.execution.QueryManager) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) AtomicReference(java.util.concurrent.atomic.AtomicReference) NodeScheduler.toWhenHasSplitQueueSpaceFuture(com.facebook.presto.execution.scheduler.NodeScheduler.toWhenHasSplitQueueSpaceFuture) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) SECONDS(java.time.temporal.ChronoUnit.SECONDS) Futures.immediateFuture(com.google.common.util.concurrent.Futures.immediateFuture) Session(com.facebook.presto.Session) NodeSelectionStrategy(com.facebook.presto.spi.schedule.NodeSelectionStrategy) NodeAssignmentStats(com.facebook.presto.execution.scheduler.NodeAssignmentStats) InternalNode(com.facebook.presto.metadata.InternalNode) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) RemoteTask(com.facebook.presto.execution.RemoteTask) Split(com.facebook.presto.metadata.Split) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SplitWeight(com.facebook.presto.spi.SplitWeight) Comparator(java.util.Comparator) Optional(java.util.Optional) Objects(java.util.Objects) NodeTtl(com.facebook.presto.spi.ttl.NodeTtl) Duration(io.airlift.units.Duration) InternalNode(com.facebook.presto.metadata.InternalNode) NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NodeMap(com.facebook.presto.execution.scheduler.NodeMap)

Example 2 with ResettableRandomizedIterator

use of com.facebook.presto.execution.scheduler.ResettableRandomizedIterator in project presto by prestodb.

the class SimpleTtlNodeSelector method selectRandomNodes.

@Override
public List<InternalNode> selectRandomNodes(int limit, Set<InternalNode> excludedNodes) {
    Map<InternalNode, NodeTtl> nodeTtlInfo = nodeTtlFetcherManager.getAllTtls();
    Map<InternalNode, Optional<ConfidenceBasedTtlInfo>> ttlInfo = nodeTtlInfo.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, e -> e.getValue().getTtlInfo().stream().min(Comparator.comparing(ConfidenceBasedTtlInfo::getExpiryInstant))));
    NodeMap nodeMap = this.nodeMap.get().get();
    List<InternalNode> activeNodes = nodeMap.getActiveNodes();
    Duration estimatedExecutionTimeRemaining = getEstimatedExecutionTimeRemaining();
    List<InternalNode> eligibleNodes = filterNodesByTtl(activeNodes, excludedNodes, ttlInfo, estimatedExecutionTimeRemaining);
    return selectNodes(limit, new ResettableRandomizedIterator<>(eligibleNodes));
}
Also used : NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) NodeScheduler.selectNodes(com.facebook.presto.execution.scheduler.NodeScheduler.selectNodes) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SplitContext(com.facebook.presto.spi.SplitContext) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) Map(java.util.Map) ConfidenceBasedTtlInfo(com.facebook.presto.spi.ttl.ConfidenceBasedTtlInfo) NODE_SELECTION_NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NODE_SELECTION_NOT_SUPPORTED) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) NO_NODES_AVAILABLE(com.facebook.presto.spi.StandardErrorCode.NO_NODES_AVAILABLE) Instant(java.time.Instant) String.format(java.lang.String.format) Objects(java.util.Objects) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NodeTtlFetcherManager(com.facebook.presto.ttl.nodettlfetchermanagers.NodeTtlFetcherManager) ResettableRandomizedIterator(com.facebook.presto.execution.scheduler.ResettableRandomizedIterator) NodeTtl(com.facebook.presto.spi.ttl.NodeTtl) Optional(java.util.Optional) NodeScheduler.calculateLowWatermark(com.facebook.presto.execution.scheduler.NodeScheduler.calculateLowWatermark) Logger(com.facebook.airlift.log.Logger) SplitPlacementResult(com.facebook.presto.execution.scheduler.SplitPlacementResult) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNodeInfo(com.facebook.presto.execution.scheduler.InternalNodeInfo) Supplier(com.google.common.base.Supplier) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) QueryManager(com.facebook.presto.execution.QueryManager) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) AtomicReference(java.util.concurrent.atomic.AtomicReference) NodeScheduler.toWhenHasSplitQueueSpaceFuture(com.facebook.presto.execution.scheduler.NodeScheduler.toWhenHasSplitQueueSpaceFuture) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) SECONDS(java.time.temporal.ChronoUnit.SECONDS) Futures.immediateFuture(com.google.common.util.concurrent.Futures.immediateFuture) Session(com.facebook.presto.Session) NodeSelectionStrategy(com.facebook.presto.spi.schedule.NodeSelectionStrategy) NodeAssignmentStats(com.facebook.presto.execution.scheduler.NodeAssignmentStats) InternalNode(com.facebook.presto.metadata.InternalNode) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) RemoteTask(com.facebook.presto.execution.RemoteTask) Split(com.facebook.presto.metadata.Split) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SplitWeight(com.facebook.presto.spi.SplitWeight) Comparator(java.util.Comparator) Optional(java.util.Optional) NodeTtl(com.facebook.presto.spi.ttl.NodeTtl) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) Duration(io.airlift.units.Duration) InternalNode(com.facebook.presto.metadata.InternalNode) NodeTaskMap(com.facebook.presto.execution.NodeTaskMap) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) NodeMap(com.facebook.presto.execution.scheduler.NodeMap)

Example 3 with ResettableRandomizedIterator

use of com.facebook.presto.execution.scheduler.ResettableRandomizedIterator in project presto by prestodb.

the class TopologyAwareNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    NodeMap nodeMap = this.nodeMap.get().get();
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    int[] topologicCounters = new int[topologicalSplitCounters.size()];
    Set<NetworkLocation> filledLocations = new HashSet<>();
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
    for (Split split : splits) {
        SplitWeight splitWeight = split.getSplitWeight();
        if (split.getNodeSelectionStrategy() == HARD_AFFINITY) {
            List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
            if (candidateNodes.isEmpty()) {
                log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
                throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
            }
            InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
            if (chosenNode != null) {
                assignment.put(chosenNode, split);
                assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
            continue;
        }
        InternalNode chosenNode = null;
        int depth = networkLocationSegmentNames.size();
        int chosenDepth = 0;
        Set<NetworkLocation> locations = new HashSet<>();
        for (HostAddress host : split.getPreferredNodes(nodeProvider)) {
            locations.add(networkLocationCache.get(host));
        }
        if (locations.isEmpty()) {
            // Add the root location
            locations.add(ROOT_LOCATION);
            depth = 0;
        }
        // Try each address at progressively shallower network locations
        for (int i = depth; i >= 0 && chosenNode == null; i--) {
            for (NetworkLocation location : locations) {
                // For example, locations which couldn't be located will be at the "root" location
                if (location.getSegments().size() < i) {
                    continue;
                }
                location = location.subLocation(0, i);
                if (filledLocations.contains(location)) {
                    continue;
                }
                Set<InternalNode> nodes = nodeMap.getActiveWorkersByNetworkPath().get(location);
                chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
                if (chosenNode != null) {
                    chosenDepth = i;
                    break;
                }
                filledLocations.add(location);
            }
        }
        if (chosenNode != null) {
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            topologicCounters[chosenDepth]++;
        } else {
            splitWaitingForAnyNode = true;
        }
    }
    for (int i = 0; i < topologicCounters.length; i++) {
        if (topologicCounters[i] > 0) {
            topologicalSplitCounters.get(i).update(topologicCounters[i]);
        }
    }
    ListenableFuture<?> blocked;
    long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, networkLocationSegmentNames.size());
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : NodeAssignmentStats(com.facebook.presto.execution.scheduler.NodeAssignmentStats) PrestoException(com.facebook.presto.spi.PrestoException) NodeProvider(com.facebook.presto.spi.NodeProvider) HostAddress(com.facebook.presto.spi.HostAddress) NetworkLocation(com.facebook.presto.execution.scheduler.NetworkLocation) SplitWeight(com.facebook.presto.spi.SplitWeight) ResettableRandomizedIterator(com.facebook.presto.execution.scheduler.ResettableRandomizedIterator) NodeMap(com.facebook.presto.execution.scheduler.NodeMap) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) InternalNode(com.facebook.presto.metadata.InternalNode) Split(com.facebook.presto.metadata.Split) SplitPlacementResult(com.facebook.presto.execution.scheduler.SplitPlacementResult) HashSet(java.util.HashSet)

Aggregations

BucketNodeMap (com.facebook.presto.execution.scheduler.BucketNodeMap)3 NodeAssignmentStats (com.facebook.presto.execution.scheduler.NodeAssignmentStats)3 NodeMap (com.facebook.presto.execution.scheduler.NodeMap)3 ResettableRandomizedIterator (com.facebook.presto.execution.scheduler.ResettableRandomizedIterator)3 SplitPlacementResult (com.facebook.presto.execution.scheduler.SplitPlacementResult)3 InternalNode (com.facebook.presto.metadata.InternalNode)3 Split (com.facebook.presto.metadata.Split)3 PrestoException (com.facebook.presto.spi.PrestoException)3 SplitWeight (com.facebook.presto.spi.SplitWeight)3 Logger (com.facebook.airlift.log.Logger)2 Session (com.facebook.presto.Session)2 NodeTaskMap (com.facebook.presto.execution.NodeTaskMap)2 QueryManager (com.facebook.presto.execution.QueryManager)2 RemoteTask (com.facebook.presto.execution.RemoteTask)2 InternalNodeInfo (com.facebook.presto.execution.scheduler.InternalNodeInfo)2 NodeScheduler.calculateLowWatermark (com.facebook.presto.execution.scheduler.NodeScheduler.calculateLowWatermark)2 NodeScheduler.selectNodes (com.facebook.presto.execution.scheduler.NodeScheduler.selectNodes)2 NodeScheduler.toWhenHasSplitQueueSpaceFuture (com.facebook.presto.execution.scheduler.NodeScheduler.toWhenHasSplitQueueSpaceFuture)2 SplitContext (com.facebook.presto.spi.SplitContext)2 NODE_SELECTION_NOT_SUPPORTED (com.facebook.presto.spi.StandardErrorCode.NODE_SELECTION_NOT_SUPPORTED)2