use of io.trino.spi.SplitWeight in project trino by trinodb.
the class TopologyAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
NodeMap nodeMap = this.nodeMap.get().get();
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
int[] topologicCounters = new int[topologicalSplitCounters.size()];
Set<NetworkLocation> filledLocations = new HashSet<>();
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
for (Split split : splits) {
SplitWeight splitWeight = split.getSplitWeight();
if (!split.isRemotelyAccessible()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
continue;
}
InternalNode chosenNode = null;
int depth = topologicalSplitCounters.size() - 1;
int chosenDepth = 0;
Set<NetworkLocation> locations = new HashSet<>();
for (HostAddress host : split.getAddresses()) {
locations.add(networkTopology.locate(host));
}
if (locations.isEmpty()) {
// Add the root location
locations.add(ROOT_LOCATION);
depth = 0;
}
// Try each address at progressively shallower network locations
for (int i = depth; i >= 0 && chosenNode == null; i--) {
for (NetworkLocation location : locations) {
// For example, locations which couldn't be located will be at the "root" location
if (location.getSegments().size() < i) {
continue;
}
location = location.subLocation(0, i);
if (filledLocations.contains(location)) {
continue;
}
Set<InternalNode> nodes = nodeMap.getWorkersByNetworkPath().get(location);
chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
if (chosenNode != null) {
chosenDepth = i;
break;
}
filledLocations.add(location);
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
topologicCounters[chosenDepth]++;
} else {
splitWaitingForAnyNode = true;
}
}
for (int i = 0; i < topologicCounters.length; i++) {
if (topologicCounters[i] > 0) {
topologicalSplitCounters.get(i).update(topologicCounters[i]);
}
}
ListenableFuture<Void> blocked;
long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, topologicalSplitCounters.size() - 1);
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
}
return new SplitPlacementResult(blocked, assignment);
}
use of io.trino.spi.SplitWeight in project trino by trinodb.
the class NodeScheduler method selectDistributionNodes.
public static SplitPlacementResult selectDistributionNodes(NodeMap nodeMap, NodeTaskMap nodeTaskMap, long maxSplitsWeightPerNode, long maxPendingSplitsWeightPerTask, int maxUnacknowledgedSplitsPerTask, Set<Split> splits, List<RemoteTask> existingTasks, BucketNodeMap bucketNodeMap) {
Multimap<InternalNode, Split> assignments = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
Set<InternalNode> blockedNodes = new HashSet<>();
for (Split split : splits) {
// node placement is forced by the bucket to node map
InternalNode node = bucketNodeMap.getAssignedNode(split).get();
SplitWeight splitWeight = split.getSplitWeight();
// if node is full, don't schedule now, which will push back on the scheduling of splits
if (canAssignSplitToDistributionNode(assignmentStats, node, maxSplitsWeightPerNode, maxPendingSplitsWeightPerTask, maxUnacknowledgedSplitsPerTask, splitWeight)) {
assignments.put(node, split);
assignmentStats.addAssignedSplit(node, splitWeight);
} else {
blockedNodes.add(node);
}
}
ListenableFuture<Void> blocked = toWhenHasSplitQueueSpaceFuture(blockedNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
return new SplitPlacementResult(blocked, ImmutableMultimap.copyOf(assignments));
}
Aggregations