Search in sources :

Example 1 with SplitWeight

use of io.trino.spi.SplitWeight in project trino by trinodb.

the class TopologyAwareNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    NodeMap nodeMap = this.nodeMap.get().get();
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    int[] topologicCounters = new int[topologicalSplitCounters.size()];
    Set<NetworkLocation> filledLocations = new HashSet<>();
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    for (Split split : splits) {
        SplitWeight splitWeight = split.getSplitWeight();
        if (!split.isRemotelyAccessible()) {
            List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
            if (candidateNodes.isEmpty()) {
                log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
                throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
            }
            InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
            if (chosenNode != null) {
                assignment.put(chosenNode, split);
                assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
            continue;
        }
        InternalNode chosenNode = null;
        int depth = topologicalSplitCounters.size() - 1;
        int chosenDepth = 0;
        Set<NetworkLocation> locations = new HashSet<>();
        for (HostAddress host : split.getAddresses()) {
            locations.add(networkTopology.locate(host));
        }
        if (locations.isEmpty()) {
            // Add the root location
            locations.add(ROOT_LOCATION);
            depth = 0;
        }
        // Try each address at progressively shallower network locations
        for (int i = depth; i >= 0 && chosenNode == null; i--) {
            for (NetworkLocation location : locations) {
                // For example, locations which couldn't be located will be at the "root" location
                if (location.getSegments().size() < i) {
                    continue;
                }
                location = location.subLocation(0, i);
                if (filledLocations.contains(location)) {
                    continue;
                }
                Set<InternalNode> nodes = nodeMap.getWorkersByNetworkPath().get(location);
                chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
                if (chosenNode != null) {
                    chosenDepth = i;
                    break;
                }
                filledLocations.add(location);
            }
        }
        if (chosenNode != null) {
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, splitWeight);
            topologicCounters[chosenDepth]++;
        } else {
            splitWaitingForAnyNode = true;
        }
    }
    for (int i = 0; i < topologicCounters.length; i++) {
        if (topologicCounters[i] > 0) {
            topologicalSplitCounters.get(i).update(topologicCounters[i]);
        }
    }
    ListenableFuture<Void> blocked;
    long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, topologicalSplitCounters.size() - 1);
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : HostAddress(io.trino.spi.HostAddress) SplitWeight(io.trino.spi.SplitWeight) TrinoException(io.trino.spi.TrinoException) InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) HashSet(java.util.HashSet)

Example 2 with SplitWeight

use of io.trino.spi.SplitWeight in project trino by trinodb.

the class NodeScheduler method selectDistributionNodes.

public static SplitPlacementResult selectDistributionNodes(NodeMap nodeMap, NodeTaskMap nodeTaskMap, long maxSplitsWeightPerNode, long maxPendingSplitsWeightPerTask, int maxUnacknowledgedSplitsPerTask, Set<Split> splits, List<RemoteTask> existingTasks, BucketNodeMap bucketNodeMap) {
    Multimap<InternalNode, Split> assignments = HashMultimap.create();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    Set<InternalNode> blockedNodes = new HashSet<>();
    for (Split split : splits) {
        // node placement is forced by the bucket to node map
        InternalNode node = bucketNodeMap.getAssignedNode(split).get();
        SplitWeight splitWeight = split.getSplitWeight();
        // if node is full, don't schedule now, which will push back on the scheduling of splits
        if (canAssignSplitToDistributionNode(assignmentStats, node, maxSplitsWeightPerNode, maxPendingSplitsWeightPerTask, maxUnacknowledgedSplitsPerTask, splitWeight)) {
            assignments.put(node, split);
            assignmentStats.addAssignedSplit(node, splitWeight);
        } else {
            blockedNodes.add(node);
        }
    }
    ListenableFuture<Void> blocked = toWhenHasSplitQueueSpaceFuture(blockedNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
    return new SplitPlacementResult(blocked, ImmutableMultimap.copyOf(assignments));
}
Also used : SplitWeight(io.trino.spi.SplitWeight) InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

InternalNode (io.trino.metadata.InternalNode)2 Split (io.trino.metadata.Split)2 SplitWeight (io.trino.spi.SplitWeight)2 HashSet (java.util.HashSet)2 HostAddress (io.trino.spi.HostAddress)1 TrinoException (io.trino.spi.TrinoException)1 LinkedHashSet (java.util.LinkedHashSet)1