use of com.facebook.presto.spi.SplitWeight in project presto by prestodb.
the class NodeScheduler method selectDistributionNodes.
public static SplitPlacementResult selectDistributionNodes(NodeMap nodeMap, NodeTaskMap nodeTaskMap, long maxSplitsWeightPerNode, long maxPendingSplitsWeightPerTask, int maxUnacknowledgedSplitsPerTask, Set<Split> splits, List<RemoteTask> existingTasks, BucketNodeMap bucketNodeMap, NodeSelectionStats nodeSelectionStats) {
Multimap<InternalNode, Split> assignments = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
Set<InternalNode> blockedNodes = new HashSet<>();
for (Split split : splits) {
// node placement is forced by the bucket to node map
InternalNode node = bucketNodeMap.getAssignedNode(split).get();
boolean isCacheable = bucketNodeMap.isSplitCacheable(split);
SplitWeight splitWeight = split.getSplitWeight();
// if node is full, don't schedule now, which will push back on the scheduling of splits
if (canAssignSplitToDistributionNode(assignmentStats, node, maxSplitsWeightPerNode, maxPendingSplitsWeightPerTask, maxUnacknowledgedSplitsPerTask, splitWeight)) {
if (isCacheable) {
split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(true));
nodeSelectionStats.incrementBucketedPreferredNodeSelectedCount();
} else {
nodeSelectionStats.incrementBucketedNonPreferredNodeSelectedCount();
}
assignments.put(node, split);
assignmentStats.addAssignedSplit(node, splitWeight);
} else {
blockedNodes.add(node);
}
}
ListenableFuture<?> blocked = toWhenHasSplitQueueSpaceFuture(blockedNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
return new SplitPlacementResult(blocked, ImmutableMultimap.copyOf(assignments));
}
use of com.facebook.presto.spi.SplitWeight in project presto by prestodb.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMap = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
List<InternalNode> eligibleNodes = getEligibleNodes(maxTasksPerStage, nodeMap, existingTasks);
NodeSelection randomNodeSelection = new RandomNodeSelection(eligibleNodes, minCandidates);
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
OptionalInt preferredNodeCount = OptionalInt.empty();
for (Split split : splits) {
List<InternalNode> candidateNodes;
switch(split.getNodeSelectionStrategy()) {
case HARD_AFFINITY:
candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
preferredNodeCount = OptionalInt.of(candidateNodes.size());
break;
case SOFT_AFFINITY:
// Using all nodes for soft affinity scheduling with modular hashing because otherwise temporarily down nodes would trigger too much rehashing
if (nodeSelectionHashStrategy == MODULAR_HASHING) {
nodeProvider = new ModularHashingNodeProvider(nodeMap.getAllNodes());
}
candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
preferredNodeCount = OptionalInt.of(candidateNodes.size());
candidateNodes = ImmutableList.<InternalNode>builder().addAll(candidateNodes).addAll(randomNodeSelection.pickNodes(split)).build();
break;
case NO_PREFERENCE:
candidateNodes = randomNodeSelection.pickNodes(split);
break;
default:
throw new PrestoException(NODE_SELECTION_NOT_SUPPORTED, format("Unsupported node selection strategy %s", split.getNodeSelectionStrategy()));
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
SplitWeight splitWeight = split.getSplitWeight();
Optional<InternalNodeInfo> chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getTotalSplitsWeight, preferredNodeCount, maxSplitsWeightPerNode, assignmentStats);
if (!chosenNodeInfo.isPresent()) {
chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getQueuedSplitsWeightForStage, preferredNodeCount, maxPendingSplitsWeightPerTask, assignmentStats);
}
if (chosenNodeInfo.isPresent()) {
split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(chosenNodeInfo.get().isCacheable()));
InternalNode chosenNode = chosenNodeInfo.get().getInternalNode();
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else {
if (split.getNodeSelectionStrategy() != HARD_AFFINITY) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
}
return new SplitPlacementResult(blocked, assignment);
}
use of com.facebook.presto.spi.SplitWeight in project presto by prestodb.
the class TestNodeScheduler method testMoreSplitsAssignedWhenSplitsWeightsAreSmall.
@Test
public void testMoreSplitsAssignedWhenSplitsWeightsAreSmall() {
int standardSplitsPerNode = nodeSchedulerConfig.getMaxSplitsPerNode();
int standardPendingSplitsPerTask = nodeSchedulerConfig.getMaxPendingSplitsPerTask();
int fullyLoadedStandardSplitCount = standardSplitsPerNode + standardPendingSplitsPerTask;
long weightLimitPerNode = SplitWeight.rawValueForStandardSplitCount(standardSplitsPerNode);
long weightLimitPendingPerTask = SplitWeight.rawValueForStandardSplitCount(standardPendingSplitsPerTask);
long fullyLoadedStandardSplitWeight = weightLimitPerNode + weightLimitPendingPerTask;
// Single worker node
nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 1);
InternalNode workerNode = nodeSelector.selectRandomNodes(1).get(0);
MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
TaskId taskId = new TaskId("test", 1, 0, 1);
MockRemoteTaskFactory.MockRemoteTask task = remoteTaskFactory.createTableScanTask(taskId, workerNode, ImmutableList.of(), nodeTaskMap.createTaskStatsTracker(workerNode, taskId));
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
ImmutableSet.Builder<Split> splitsBuilder = ImmutableSet.builderWithExpectedSize(fullyLoadedStandardSplitCount * 2);
// Create 2x more splits than the standard split count limit, at 1/2 the standard weight
SplitWeight halfWeight = SplitWeight.fromProportion(0.5);
for (int i = 0; i < fullyLoadedStandardSplitCount * 2; i++) {
splitsBuilder.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(halfWeight)));
}
Set<Split> splits = splitsBuilder.build();
// Verify we arrived at the exact weight limit
assertEquals(SplitWeight.rawValueSum(splits, Split::getSplitWeight), fullyLoadedStandardSplitWeight);
// Node assignment limit met
SplitPlacementResult result = nodeSelector.computeAssignments(splits, ImmutableList.of(task));
assertEquals(result.getAssignments().get(workerNode).size(), standardSplitsPerNode * 2);
assertEquals(SplitWeight.rawValueSum(result.getAssignments().get(workerNode), Split::getSplitWeight), weightLimitPerNode);
// Mark all splits as running
task.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), result.getAssignments().get(workerNode)).build());
task.startSplits(result.getAssignments().get(workerNode).size());
// Per task pending splits limit met
Set<Split> remainingSplits = Sets.difference(splits, ImmutableSet.copyOf(result.getAssignments().get(workerNode)));
SplitPlacementResult secondResults = nodeSelector.computeAssignments(remainingSplits, ImmutableList.of(task));
assertEquals(secondResults.getAssignments().get(workerNode).size(), standardPendingSplitsPerTask * 2);
assertEquals(SplitWeight.rawValueSum(secondResults.getAssignments().get(workerNode), Split::getSplitWeight), weightLimitPendingPerTask);
task.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), secondResults.getAssignments().get(workerNode)).build());
assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(workerNode), // 2x fully loaded standard count, full weight limit reached
PartitionedSplitsInfo.forSplitCountAndWeightSum(fullyLoadedStandardSplitCount * 2, fullyLoadedStandardSplitWeight));
// No more splits assigned when full
SplitPlacementResult resultWhenFull = nodeSelector.computeAssignments(ImmutableSet.of(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote())), ImmutableList.of(task));
assertTrue(resultWhenFull.getAssignments().isEmpty());
}
use of com.facebook.presto.spi.SplitWeight in project presto by prestodb.
the class SimpleTtlNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
boolean isNodeSelectionStrategyNoPreference = splits.stream().allMatch(split -> split.getNodeSelectionStrategy() == NodeSelectionStrategy.NO_PREFERENCE);
// Current NodeSelectionStrategy support is limited to NO_PREFERENCE
if (!isNodeSelectionStrategyNoPreference) {
return simpleNodeSelector.computeAssignments(splits, existingTasks);
}
ImmutableMultimap.Builder<InternalNode, Split> assignment = ImmutableMultimap.builder();
NodeMap nodeMap = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
List<InternalNode> eligibleNodes = getEligibleNodes(maxTasksPerStage, nodeMap, existingTasks);
NodeSelection randomNodeSelection = new RandomNodeSelection(eligibleNodes, minCandidates);
boolean splitWaitingForAnyNode = false;
OptionalInt preferredNodeCount = OptionalInt.empty();
for (Split split : splits) {
if (split.getNodeSelectionStrategy() != NodeSelectionStrategy.NO_PREFERENCE) {
throw new PrestoException(NODE_SELECTION_NOT_SUPPORTED, format("Unsupported node selection strategy for TTL scheduling: %s", split.getNodeSelectionStrategy()));
}
List<InternalNode> candidateNodes = randomNodeSelection.pickNodes(split);
if (candidateNodes.isEmpty()) {
log.warn("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
SplitWeight splitWeight = split.getSplitWeight();
Optional<InternalNodeInfo> chosenNodeInfo = simpleNodeSelector.chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getTotalSplitsWeight, preferredNodeCount, maxSplitsWeightPerNode, assignmentStats);
if (!chosenNodeInfo.isPresent()) {
chosenNodeInfo = simpleNodeSelector.chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getQueuedSplitsWeightForStage, preferredNodeCount, maxPendingSplitsWeightPerTask, assignmentStats);
}
if (chosenNodeInfo.isPresent()) {
split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(chosenNodeInfo.get().isCacheable()));
InternalNode chosenNode = chosenNodeInfo.get().getInternalNode();
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else {
splitWaitingForAnyNode = true;
}
}
ListenableFuture<?> blocked = splitWaitingForAnyNode ? toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask)) : immediateFuture(null);
return new SplitPlacementResult(blocked, assignment.build());
}
use of com.facebook.presto.spi.SplitWeight in project presto by prestodb.
the class TopologyAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
NodeMap nodeMap = this.nodeMap.get().get();
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
int[] topologicCounters = new int[topologicalSplitCounters.size()];
Set<NetworkLocation> filledLocations = new HashSet<>();
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
for (Split split : splits) {
SplitWeight splitWeight = split.getSplitWeight();
if (split.getNodeSelectionStrategy() == HARD_AFFINITY) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
continue;
}
InternalNode chosenNode = null;
int depth = networkLocationSegmentNames.size();
int chosenDepth = 0;
Set<NetworkLocation> locations = new HashSet<>();
for (HostAddress host : split.getPreferredNodes(nodeProvider)) {
locations.add(networkLocationCache.get(host));
}
if (locations.isEmpty()) {
// Add the root location
locations.add(ROOT_LOCATION);
depth = 0;
}
// Try each address at progressively shallower network locations
for (int i = depth; i >= 0 && chosenNode == null; i--) {
for (NetworkLocation location : locations) {
// For example, locations which couldn't be located will be at the "root" location
if (location.getSegments().size() < i) {
continue;
}
location = location.subLocation(0, i);
if (filledLocations.contains(location)) {
continue;
}
Set<InternalNode> nodes = nodeMap.getActiveWorkersByNetworkPath().get(location);
chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
if (chosenNode != null) {
chosenDepth = i;
break;
}
filledLocations.add(location);
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
topologicCounters[chosenDepth]++;
} else {
splitWaitingForAnyNode = true;
}
}
for (int i = 0; i < topologicCounters.length; i++) {
if (topologicCounters[i] > 0) {
topologicalSplitCounters.get(i).update(topologicCounters[i]);
}
}
ListenableFuture<?> blocked;
long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, networkLocationSegmentNames.size());
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
}
return new SplitPlacementResult(blocked, assignment);
}
Aggregations