use of com.facebook.presto.execution.scheduler.SplitPlacementResult in project presto by prestodb.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMap = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
List<InternalNode> eligibleNodes = getEligibleNodes(maxTasksPerStage, nodeMap, existingTasks);
NodeSelection randomNodeSelection = new RandomNodeSelection(eligibleNodes, minCandidates);
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
NodeProvider nodeProvider = nodeMap.getActiveNodeProvider(nodeSelectionHashStrategy);
OptionalInt preferredNodeCount = OptionalInt.empty();
for (Split split : splits) {
List<InternalNode> candidateNodes;
switch(split.getNodeSelectionStrategy()) {
case HARD_AFFINITY:
candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
preferredNodeCount = OptionalInt.of(candidateNodes.size());
break;
case SOFT_AFFINITY:
// Using all nodes for soft affinity scheduling with modular hashing because otherwise temporarily down nodes would trigger too much rehashing
if (nodeSelectionHashStrategy == MODULAR_HASHING) {
nodeProvider = new ModularHashingNodeProvider(nodeMap.getAllNodes());
}
candidateNodes = selectExactNodes(nodeMap, split.getPreferredNodes(nodeProvider), includeCoordinator);
preferredNodeCount = OptionalInt.of(candidateNodes.size());
candidateNodes = ImmutableList.<InternalNode>builder().addAll(candidateNodes).addAll(randomNodeSelection.pickNodes(split)).build();
break;
case NO_PREFERENCE:
candidateNodes = randomNodeSelection.pickNodes(split);
break;
default:
throw new PrestoException(NODE_SELECTION_NOT_SUPPORTED, format("Unsupported node selection strategy %s", split.getNodeSelectionStrategy()));
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getActiveNodes());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
SplitWeight splitWeight = split.getSplitWeight();
Optional<InternalNodeInfo> chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getTotalSplitsWeight, preferredNodeCount, maxSplitsWeightPerNode, assignmentStats);
if (!chosenNodeInfo.isPresent()) {
chosenNodeInfo = chooseLeastBusyNode(splitWeight, candidateNodes, assignmentStats::getQueuedSplitsWeightForStage, preferredNodeCount, maxPendingSplitsWeightPerTask, assignmentStats);
}
if (chosenNodeInfo.isPresent()) {
split = new Split(split.getConnectorId(), split.getTransactionHandle(), split.getConnectorSplit(), split.getLifespan(), new SplitContext(chosenNodeInfo.get().isCacheable()));
InternalNode chosenNode = chosenNodeInfo.get().getInternalNode();
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else {
if (split.getNodeSelectionStrategy() != HARD_AFFINITY) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
}
return new SplitPlacementResult(blocked, assignment);
}
use of com.facebook.presto.execution.scheduler.SplitPlacementResult in project presto by prestodb.
the class TestNodeScheduler method testAffinityAssignmentWithModularHashing.
@Test
public void testAffinityAssignmentWithModularHashing() {
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
NodeScheduler nodeScheduler = new NodeScheduler(new LegacyNetworkTopology(), nodeManager, new NodeSelectionStats(), nodeSchedulerConfig, nodeTaskMap, new ThrowingNodeTtlFetcherManager(), new NoOpQueryManager(), new SimpleTtlNodeSelectorConfig());
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 3);
Set<Split> splits = new HashSet<>();
// Adding one more split (1 % 3 = 1), 1 splits will be distributed to 1 nodes
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestAffinitySplitRemote(1)));
SplitPlacementResult splitPlacementResult = nodeSelector.computeAssignments(splits, ImmutableList.of());
Set<InternalNode> internalNodes = splitPlacementResult.getAssignments().keySet();
assertEquals(internalNodes.size(), 1);
// Adding one more split (2 % 3 = 2), 2 splits will be distributed to 2 nodes
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestAffinitySplitRemote(2)));
splitPlacementResult = nodeSelector.computeAssignments(splits, getRemoteTableScanTask(splitPlacementResult));
Set<InternalNode> internalNodesSecondCall = splitPlacementResult.getAssignments().keySet();
assertEquals(internalNodesSecondCall.size(), 2);
// adding one more split(4 % 3 = 1) that will fall into the same slots, 3 splits will be distributed to 2 nodes still
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestAffinitySplitRemote(4)));
splitPlacementResult = nodeSelector.computeAssignments(splits, getRemoteTableScanTask(splitPlacementResult));
assertEquals(splitPlacementResult.getAssignments().keySet().size(), 2);
// adding one more split(3 % 3 = 0) that will fall into different slots, 3 splits will be distributed to 3 nodes
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestAffinitySplitRemote(3)));
splitPlacementResult = nodeSelector.computeAssignments(splits, getRemoteTableScanTask(splitPlacementResult));
assertEquals(splitPlacementResult.getAssignments().keySet().size(), 3);
}
use of com.facebook.presto.execution.scheduler.SplitPlacementResult in project presto by prestodb.
the class TestNodeScheduler method testAffinityAssignmentWithConsistentHashing.
@Test
public void testAffinityAssignmentWithConsistentHashing() {
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setNodeSelectionHashStrategy(CONSISTENT_HASHING).setMinVirtualNodeCount(3).setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
NodeScheduler nodeScheduler = new NodeScheduler(new LegacyNetworkTopology(), nodeManager, new NodeSelectionStats(), nodeSchedulerConfig, nodeTaskMap, new ThrowingNodeTtlFetcherManager(), new NoOpQueryManager(), new SimpleTtlNodeSelectorConfig());
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 3);
Set<Split> splits = new HashSet<>();
IntStream.range(0, 10).forEach(i -> splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestAffinitySplitRemote(i))));
InternalNode node1 = new InternalNode("other1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false);
InternalNode node2 = new InternalNode("other2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false);
InternalNode node3 = new InternalNode("other3", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false);
InternalNode node4 = new InternalNode("other4", URI.create("http://127.0.0.1:14"), NodeVersion.UNKNOWN, false);
// In setup node 1-3 are added to node manager
SplitPlacementResult splitPlacementResult = nodeSelector.computeAssignments(splits, ImmutableList.of());
assertEquals(splitPlacementResult.getAssignments().keySet().size(), 3);
// node1: split 1, 3, 4, 5, 6, 7, 8, 9
Collection<ConnectorSplit> node1Splits = splitPlacementResult.getAssignments().get(node1).stream().map(Split::getConnectorSplit).collect(toImmutableSet());
// node2: 0
Collection<Object> node2Splits = splitPlacementResult.getAssignments().get(node2).stream().map(Split::getConnectorSplit).collect(toImmutableSet());
// node3: split 2
Collection<Object> node3Splits = splitPlacementResult.getAssignments().get(node3).stream().map(Split::getConnectorSplit).collect(toImmutableSet());
// Scheduling the same splits on the same set of nodes should give the same assignment
nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 3);
splitPlacementResult = nodeSelector.computeAssignments(splits, ImmutableList.of());
assertEquals(splitPlacementResult.getAssignments().get(node1).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node1Splits);
assertEquals(splitPlacementResult.getAssignments().get(node2).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node2Splits);
assertEquals(splitPlacementResult.getAssignments().get(node3).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node3Splits);
// Adding node4. Node4 is hashed in between node3 and node1, and all splits previously assigned to node1 are now assigned to node4. Assignment to node2 and node3 should not change.
nodeManager.addNode(CONNECTOR_ID, node4);
nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 3);
splitPlacementResult = nodeSelector.computeAssignments(splits, ImmutableList.of());
assertEquals(splitPlacementResult.getAssignments().keySet().size(), 3);
assertEquals(splitPlacementResult.getAssignments().get(node1), ImmutableList.of());
assertEquals(splitPlacementResult.getAssignments().get(node2).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node2Splits);
assertEquals(splitPlacementResult.getAssignments().get(node3).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node3Splits);
assertEquals(splitPlacementResult.getAssignments().get(node4).stream().map(Split::getConnectorSplit).collect(toImmutableSet()), node1Splits);
}
use of com.facebook.presto.execution.scheduler.SplitPlacementResult in project presto by prestodb.
the class TestNodeScheduler method testMaxTasksPerStageAddingNewNodes.
@Test
public void testMaxTasksPerStageAddingNewNodes() {
InMemoryNodeManager nodeManager = new InMemoryNodeManager();
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(20).setIncludeCoordinator(false).setMaxPendingSplitsPerTask(10);
LegacyNetworkTopology networkTopology = new LegacyNetworkTopology();
// refresh interval is 1 nanosecond
NodeScheduler nodeScheduler = new NodeScheduler(new NetworkLocationCache(networkTopology), networkTopology, nodeManager, new NodeSelectionStats(), nodeSchedulerConfig, nodeTaskMap, Duration.valueOf("0s"), new ThrowingNodeTtlFetcherManager(), new NoOpQueryManager(), new SimpleTtlNodeSelectorConfig());
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 2);
Set<Split> splits = new HashSet<>();
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote()));
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote()));
splits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote()));
nodeManager.addNode(CONNECTOR_ID, ImmutableList.of(new InternalNode("node1", URI.create("http://127.0.0.1:11"), NodeVersion.UNKNOWN, false)));
SplitPlacementResult splitPlacementResult = nodeSelector.computeAssignments(splits, ImmutableList.of());
Set<InternalNode> internalNodes = splitPlacementResult.getAssignments().keySet();
assertEquals(internalNodes.size(), 1);
nodeManager.addNode(CONNECTOR_ID, ImmutableList.of(new InternalNode("node2", URI.create("http://127.0.0.1:12"), NodeVersion.UNKNOWN, false)));
splitPlacementResult = nodeSelector.computeAssignments(splits, getRemoteTableScanTask(splitPlacementResult));
Set<InternalNode> internalNodesSecondCall = splitPlacementResult.getAssignments().keySet();
assertEquals(internalNodesSecondCall.size(), 2);
assertTrue(internalNodesSecondCall.containsAll(internalNodes));
nodeManager.addNode(CONNECTOR_ID, ImmutableList.of(new InternalNode("node2", URI.create("http://127.0.0.1:13"), NodeVersion.UNKNOWN, false)));
internalNodes = splitPlacementResult.getAssignments().keySet();
assertEquals(internalNodes.size(), 2);
assertTrue(internalNodesSecondCall.containsAll(internalNodes));
}
use of com.facebook.presto.execution.scheduler.SplitPlacementResult in project presto by prestodb.
the class TestNodeScheduler method testMoreSplitsAssignedWhenSplitsWeightsAreSmall.
@Test
public void testMoreSplitsAssignedWhenSplitsWeightsAreSmall() {
int standardSplitsPerNode = nodeSchedulerConfig.getMaxSplitsPerNode();
int standardPendingSplitsPerTask = nodeSchedulerConfig.getMaxPendingSplitsPerTask();
int fullyLoadedStandardSplitCount = standardSplitsPerNode + standardPendingSplitsPerTask;
long weightLimitPerNode = SplitWeight.rawValueForStandardSplitCount(standardSplitsPerNode);
long weightLimitPendingPerTask = SplitWeight.rawValueForStandardSplitCount(standardPendingSplitsPerTask);
long fullyLoadedStandardSplitWeight = weightLimitPerNode + weightLimitPendingPerTask;
// Single worker node
nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID, 1);
InternalNode workerNode = nodeSelector.selectRandomNodes(1).get(0);
MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
TaskId taskId = new TaskId("test", 1, 0, 1);
MockRemoteTaskFactory.MockRemoteTask task = remoteTaskFactory.createTableScanTask(taskId, workerNode, ImmutableList.of(), nodeTaskMap.createTaskStatsTracker(workerNode, taskId));
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
ImmutableSet.Builder<Split> splitsBuilder = ImmutableSet.builderWithExpectedSize(fullyLoadedStandardSplitCount * 2);
// Create 2x more splits than the standard split count limit, at 1/2 the standard weight
SplitWeight halfWeight = SplitWeight.fromProportion(0.5);
for (int i = 0; i < fullyLoadedStandardSplitCount * 2; i++) {
splitsBuilder.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(halfWeight)));
}
Set<Split> splits = splitsBuilder.build();
// Verify we arrived at the exact weight limit
assertEquals(SplitWeight.rawValueSum(splits, Split::getSplitWeight), fullyLoadedStandardSplitWeight);
// Node assignment limit met
SplitPlacementResult result = nodeSelector.computeAssignments(splits, ImmutableList.of(task));
assertEquals(result.getAssignments().get(workerNode).size(), standardSplitsPerNode * 2);
assertEquals(SplitWeight.rawValueSum(result.getAssignments().get(workerNode), Split::getSplitWeight), weightLimitPerNode);
// Mark all splits as running
task.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), result.getAssignments().get(workerNode)).build());
task.startSplits(result.getAssignments().get(workerNode).size());
// Per task pending splits limit met
Set<Split> remainingSplits = Sets.difference(splits, ImmutableSet.copyOf(result.getAssignments().get(workerNode)));
SplitPlacementResult secondResults = nodeSelector.computeAssignments(remainingSplits, ImmutableList.of(task));
assertEquals(secondResults.getAssignments().get(workerNode).size(), standardPendingSplitsPerTask * 2);
assertEquals(SplitWeight.rawValueSum(secondResults.getAssignments().get(workerNode), Split::getSplitWeight), weightLimitPendingPerTask);
task.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), secondResults.getAssignments().get(workerNode)).build());
assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(workerNode), // 2x fully loaded standard count, full weight limit reached
PartitionedSplitsInfo.forSplitCountAndWeightSum(fullyLoadedStandardSplitCount * 2, fullyLoadedStandardSplitWeight));
// No more splits assigned when full
SplitPlacementResult resultWhenFull = nodeSelector.computeAssignments(ImmutableSet.of(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote())), ImmutableList.of(task));
assertTrue(resultWhenFull.getAssignments().isEmpty());
}
Aggregations