use of com.facebook.presto.metadata.InternalNode in project presto by prestodb.
the class TopologyAwareNodeSelector method bestNodeSplitCount.
@Nullable
private InternalNode bestNodeSplitCount(SplitWeight splitWeight, Iterator<InternalNode> candidates, int minCandidatesWhenFull, long maxPendingSplitsWeightPerTask, NodeAssignmentStats assignmentStats) {
InternalNode bestQueueNotFull = null;
long minWeight = Long.MAX_VALUE;
int fullCandidatesConsidered = 0;
while (candidates.hasNext() && (fullCandidatesConsidered < minCandidatesWhenFull || bestQueueNotFull == null)) {
InternalNode node = candidates.next();
if (assignmentStats.getUnacknowledgedSplitCountForStage(node) >= maxUnacknowledgedSplitsPerTask) {
fullCandidatesConsidered++;
continue;
}
if (canAssignSplitBasedOnWeight(assignmentStats.getTotalSplitsWeight(node), maxSplitsWeightPerNode, splitWeight)) {
return node;
}
fullCandidatesConsidered++;
long taskQueuedWeight = assignmentStats.getQueuedSplitsWeightForStage(node);
if (taskQueuedWeight < minWeight && canAssignSplitBasedOnWeight(taskQueuedWeight, maxPendingSplitsWeightPerTask, splitWeight)) {
minWeight = taskQueuedWeight;
bestQueueNotFull = node;
}
}
return bestQueueNotFull;
}
use of com.facebook.presto.metadata.InternalNode in project presto by prestodb.
the class TopologyAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
NodeMap nodeMap = this.nodeMap.get().get();
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
int[] topologicCounters = new int[topologicalSplitCounters.size()];
Set<NetworkLocation> filledLocations = new HashSet<>();
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
for (Split split : splits) {
SplitWeight splitWeight = split.getSplitWeight();
if (split.getNodeSelectionStrategy() == HARD_AFFINITY) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
continue;
}
InternalNode chosenNode = null;
int depth = networkLocationSegmentNames.size();
int chosenDepth = 0;
Set<NetworkLocation> locations = new HashSet<>();
for (HostAddress host : split.getPreferredNodes(nodeProvider)) {
locations.add(networkLocationCache.get(host));
}
if (locations.isEmpty()) {
// Add the root location
locations.add(ROOT_LOCATION);
depth = 0;
}
// Try each address at progressively shallower network locations
for (int i = depth; i >= 0 && chosenNode == null; i--) {
for (NetworkLocation location : locations) {
// For example, locations which couldn't be located will be at the "root" location
if (location.getSegments().size() < i) {
continue;
}
location = location.subLocation(0, i);
if (filledLocations.contains(location)) {
continue;
}
Set<InternalNode> nodes = nodeMap.getActiveWorkersByNetworkPath().get(location);
chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
if (chosenNode != null) {
chosenDepth = i;
break;
}
filledLocations.add(location);
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
topologicCounters[chosenDepth]++;
} else {
splitWaitingForAnyNode = true;
}
}
for (int i = 0; i < topologicCounters.length; i++) {
if (topologicCounters[i] > 0) {
topologicalSplitCounters.get(i).update(topologicCounters[i]);
}
}
ListenableFuture<?> blocked;
long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, networkLocationSegmentNames.size());
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
}
return new SplitPlacementResult(blocked, assignment);
}
use of com.facebook.presto.metadata.InternalNode in project presto by prestodb.
the class NodeScheduler method toWhenHasSplitQueueSpaceFuture.
public static ListenableFuture<?> toWhenHasSplitQueueSpaceFuture(Set<InternalNode> blockedNodes, List<RemoteTask> existingTasks, long weightSpaceThreshold) {
if (blockedNodes.isEmpty()) {
return immediateFuture(null);
}
Map<String, RemoteTask> nodeToTaskMap = new HashMap<>();
for (RemoteTask task : existingTasks) {
nodeToTaskMap.put(task.getNodeId(), task);
}
List<ListenableFuture<?>> blockedFutures = blockedNodes.stream().map(InternalNode::getNodeIdentifier).map(nodeToTaskMap::get).filter(Objects::nonNull).map(remoteTask -> remoteTask.whenSplitQueueHasSpace(weightSpaceThreshold)).collect(toImmutableList());
if (blockedFutures.isEmpty()) {
return immediateFuture(null);
}
return whenAnyCompleteCancelOthers(blockedFutures);
}
use of com.facebook.presto.metadata.InternalNode in project presto by prestodb.
the class SimpleNodeSelector method chooseLeastBusyNode.
protected Optional<InternalNodeInfo> chooseLeastBusyNode(SplitWeight splitWeight, List<InternalNode> candidateNodes, ToLongFunction<InternalNode> splitWeightProvider, OptionalInt preferredNodeCount, long maxSplitsWeight, NodeAssignmentStats assignmentStats) {
long minWeight = Long.MAX_VALUE;
InternalNode chosenNode = null;
for (int i = 0; i < candidateNodes.size(); i++) {
InternalNode node = candidateNodes.get(i);
if (node.getNodeStatus() == DEAD) {
// Node is down. Do not schedule split. Skip it.
if (preferredNodeCount.isPresent() && i < preferredNodeCount.getAsInt()) {
nodeSelectionStats.incrementPreferredNonAliveNodeSkippedCount();
}
continue;
}
if (assignmentStats.getUnacknowledgedSplitCountForStage(node) >= maxUnacknowledgedSplitsPerTask) {
continue;
}
long currentWeight = splitWeightProvider.applyAsLong(node);
boolean canAssignToNode = canAssignSplitBasedOnWeight(currentWeight, maxSplitsWeight, splitWeight);
// choose the preferred node first as long as they're not busy
if (preferredNodeCount.isPresent() && i < preferredNodeCount.getAsInt() && canAssignToNode) {
if (i == 0) {
nodeSelectionStats.incrementPrimaryPreferredNodeSelectedCount();
} else {
nodeSelectionStats.incrementNonPrimaryPreferredNodeSelectedCount();
}
return Optional.of(new InternalNodeInfo(node, true));
}
// fallback to choosing the least busy nodes
if (canAssignToNode && currentWeight < minWeight) {
chosenNode = node;
minWeight = currentWeight;
}
}
if (chosenNode == null) {
return Optional.empty();
}
nodeSelectionStats.incrementNonPreferredNodeSelectedCount();
return Optional.of(new InternalNodeInfo(chosenNode, false));
}
use of com.facebook.presto.metadata.InternalNode in project presto by prestodb.
the class ScaledWriterScheduler method scheduleTasks.
private List<RemoteTask> scheduleTasks(int count) {
if (count == 0) {
return ImmutableList.of();
}
List<InternalNode> nodes = nodeSelector.selectRandomNodes(count, scheduledNodes);
checkCondition(!scheduledNodes.isEmpty() || !nodes.isEmpty(), NO_NODES_AVAILABLE, "No nodes available to run query");
ImmutableList.Builder<RemoteTask> tasks = ImmutableList.builder();
for (InternalNode node : nodes) {
Optional<RemoteTask> remoteTask = stage.scheduleTask(node, scheduledNodes.size());
remoteTask.ifPresent(task -> {
tasks.add(task);
scheduledNodes.add(node);
});
}
return tasks.build();
}
Aggregations