use of io.trino.spi.StandardErrorCode.NO_NODES_AVAILABLE in project trino by trinodb.
the class FullNodeCapableNodeAllocatorService method processFullNodePendingAcquires.
private void processFullNodePendingAcquires() {
Map<PendingAcquire, InternalNode> assignedNodes = new IdentityHashMap<>();
Map<PendingAcquire, RuntimeException> failures = new IdentityHashMap<>();
synchronized (this) {
Iterator<PendingAcquire> detachedIterator = detachedFullNodePendingAcquires.iterator();
while (detachedIterator.hasNext()) {
PendingAcquire pendingAcquire = detachedIterator.next();
try {
if (pendingAcquire.getFuture().isCancelled()) {
// discard cancelled detached pendingAcquire
detachedIterator.remove();
continue;
}
Candidates currentCandidates = selectCandidates(pendingAcquire.getNodeRequirements());
if (currentCandidates.isEmpty()) {
throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
Optional<InternalNode> target = findTargetPendingFullNode(pendingAcquire.getQueryId(), currentCandidates);
if (target.isEmpty()) {
// leave pendingAcquire as pending
continue;
}
// move pendingAcquire to fullNodePendingAcquires
fullNodePendingAcquires.put(target.get(), pendingAcquire);
fullNodesByQueryId.put(pendingAcquire.getQueryId(), target.get());
detachedIterator.remove();
} catch (RuntimeException e) {
failures.put(pendingAcquire, e);
detachedIterator.remove();
}
}
Set<InternalNode> nodes = ImmutableSet.copyOf(fullNodePendingAcquires.keySet());
for (InternalNode reservedNode : nodes) {
PendingAcquire pendingAcquire = fullNodePendingAcquires.get(reservedNode);
if (pendingAcquire.getFuture().isCancelled()) {
// discard cancelled pendingAcquire with target node
fullNodePendingAcquires.remove(reservedNode);
verify(fullNodesByQueryId.remove(pendingAcquire.getQueryId(), reservedNode));
continue;
}
try {
Candidates currentCandidates = selectCandidates(pendingAcquire.getNodeRequirements());
if (currentCandidates.isEmpty()) {
throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
if (sharedAllocatedMemory.getOrDefault(reservedNode, 0L) > 0 || allocatedFullNodes.contains(reservedNode)) {
// reserved node is still used - opportunistic check if maybe there is some other empty, not waited for node available
Optional<InternalNode> opportunisticNode = currentCandidates.getCandidates().stream().filter(node -> !fullNodePendingAcquires.containsKey(node)).filter(node -> !allocatedFullNodes.contains(node)).filter(node -> sharedAllocatedMemory.getOrDefault(node, 0L) == 0).findFirst();
if (opportunisticNode.isPresent()) {
fullNodePendingAcquires.remove(reservedNode);
verify(fullNodesByQueryId.remove(pendingAcquire.getQueryId(), reservedNode));
allocatedFullNodes.add(opportunisticNode.get());
verify(fullNodesByQueryId.put(pendingAcquire.getQueryId(), opportunisticNode.get()));
assignedNodes.put(pendingAcquire, opportunisticNode.get());
}
continue;
}
if (!currentCandidates.getCandidates().contains(reservedNode)) {
// current candidate is gone; move pendingAcquire to detached state
detachedFullNodePendingAcquires.add(pendingAcquire);
fullNodePendingAcquires.remove(reservedNode);
verify(fullNodesByQueryId.remove(pendingAcquire.getQueryId(), reservedNode));
// trigger one more round of processing immediately
wakeupProcessPendingAcquires();
continue;
}
// we are good acquiring reserved full node
allocatedFullNodes.add(reservedNode);
fullNodePendingAcquires.remove(reservedNode);
assignedNodes.put(pendingAcquire, reservedNode);
} catch (RuntimeException e) {
failures.put(pendingAcquire, e);
fullNodePendingAcquires.remove(reservedNode);
fullNodesByQueryId.remove(pendingAcquire.getQueryId(), reservedNode);
}
}
}
// complete futures outside of synchronized section
checkState(!Thread.holdsLock(this), "Cannot complete node futures under lock");
assignedNodes.forEach((pendingAcquire, node) -> {
SettableFuture<InternalNode> future = pendingAcquire.getFuture();
future.set(node);
if (future.isCancelled()) {
releaseFullNode(node, pendingAcquire.getQueryId());
}
});
failures.forEach((pendingAcquire, failure) -> {
SettableFuture<InternalNode> future = pendingAcquire.getFuture();
future.setException(failure);
});
}
use of io.trino.spi.StandardErrorCode.NO_NODES_AVAILABLE in project trino by trinodb.
the class UniformNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMap = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMap, includeCoordinator, ImmutableSet.of());
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
// splitsToBeRedistributed becomes true only when splits go through locality-based assignment
boolean splitsToBeRedistributed = false;
Set<Split> remainingSplits = new HashSet<>();
// optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
if (optimizedLocalScheduling) {
for (Split split : splits) {
if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitsWeight(ownerNode) < maxSplitsWeightPerNode && assignmentStats.getUnacknowledgedSplitCountForStage(ownerNode) < maxUnacknowledgedSplitsPerTask).min(comparingLong(assignmentStats::getTotalSplitsWeight));
if (chosenNode.isPresent()) {
assignment.put(chosenNode.get(), split);
assignmentStats.addAssignedSplit(chosenNode.get(), split.getSplitWeight());
splitsToBeRedistributed = true;
continue;
}
}
remainingSplits.add(split);
}
} else {
remainingSplits = splits;
}
for (Split split : remainingSplits) {
randomCandidates.reset();
List<InternalNode> candidateNodes;
if (!split.isRemotelyAccessible()) {
candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
} else {
candidateNodes = selectNodes(minCandidates, randomCandidates);
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = chooseNodeForSplit(assignmentStats, candidateNodes);
if (chosenNode == null) {
long minWeight = Long.MAX_VALUE;
for (InternalNode node : candidateNodes) {
long queuedWeight = assignmentStats.getQueuedSplitsWeightForStage(node);
if (queuedWeight <= minWeight && queuedWeight < maxPendingSplitsWeightPerTask && assignmentStats.getUnacknowledgedSplitCountForStage(node) < maxUnacknowledgedSplitsPerTask) {
chosenNode = node;
minWeight = queuedWeight;
}
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, split.getSplitWeight());
} else {
if (split.isRemotelyAccessible()) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
ListenableFuture<Void> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
}
if (splitsToBeRedistributed) {
equateDistribution(assignment, assignmentStats, nodeMap, includeCoordinator);
}
return new SplitPlacementResult(blocked, assignment);
}
Aggregations