use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.
the class TestSourcePartitionedScheduler method testScheduleSlowSplitSource.
@Test
public void testScheduleSlowSplitSource() {
QueuedSplitSource queuedSplitSource = new QueuedSplitSource(TestingSplit::createRemoteSplit);
StageExecutionPlan plan = createPlan(queuedSplitSource);
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1);
// schedule with no splits - will block
ScheduleResult scheduleResult = scheduler.schedule();
assertFalse(scheduleResult.isFinished());
assertFalse(scheduleResult.getBlocked().isDone());
assertEquals(scheduleResult.getNewTasks().size(), 0);
assertEquals(stage.getAllTasks().size(), 0);
queuedSplitSource.addSplits(1);
assertTrue(scheduleResult.getBlocked().isDone());
}
use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMapSlice, ImmutableSet.of());
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
// splitsToBeRedistributed becomes true only when splits go through locality-based assignment
boolean splitsToBeRedistributed = false;
Set<Split> remainingSplits = new HashSet<>();
// Check if the current stage has a TableScanNode which is reading the table for the 2nd time or beyond
if (stage.isPresent() && stage.get().getStateMachine().getConsumerScanNode() != null) {
try {
// if node exists, get the TableScanNode and cast it as consumer
TableScanNode consumer = stage.get().getStateMachine().getConsumerScanNode();
// all tables part of this stage
Map<PlanNodeId, TableInfo> tables = stage.get().getStageInfo().getTables();
QualifiedObjectName tableName;
for (Map.Entry<PlanNodeId, TableInfo> entry : tables.entrySet()) {
tableName = entry.getValue().getTableName();
if (tableSplitAssignmentInfo.getReuseTableScanMappingIdSplitAssignmentMap().containsKey(consumer.getReuseTableScanMappingId())) {
// compare splitkey using equals and then assign nodes accordingly.
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment = tableSplitAssignmentInfo.getSplitKeyNodeAssignment(consumer.getReuseTableScanMappingId());
Set<SplitKey> splitKeySet = splitKeyNodeAssignment.keySet();
assignment.putAll(createConsumerScanNodeAssignment(tableName, splits, splitKeySet, splitKeyNodeAssignment));
for (Map.Entry<InternalNode, Split> nodeAssignmentEntry : assignment.entries()) {
InternalNode node = nodeAssignmentEntry.getKey();
assignmentStats.addAssignedSplit(node);
}
}
}
log.debug("Consumer:: Assignment size is " + assignment.size() + " ,Assignment is " + assignment + " ,Assignment Stats is " + assignmentStats);
} catch (NotImplementedException e) {
log.error("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
throw new UnsupportedOperationException("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
}
} else {
// optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
if (optimizedLocalScheduling) {
// should not hit for consumer case
for (Split split : splits) {
if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitCount(ownerNode) < maxSplitsPerNode).min(comparingInt(assignmentStats::getTotalSplitCount));
if (chosenNode.isPresent()) {
assignment.put(chosenNode.get(), split);
// check later
assignmentStats.addAssignedSplit(chosenNode.get());
splitsToBeRedistributed = true;
continue;
}
}
remainingSplits.add(split);
}
} else {
remainingSplits = splits;
}
for (Split split : remainingSplits) {
randomCandidates.reset();
List<InternalNode> candidateNodes;
if (!split.isRemotelyAccessible()) {
candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
} else {
candidateNodes = selectNodes(minCandidates, randomCandidates);
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = null;
int min = Integer.MAX_VALUE;
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getTotalSplitCount(node);
if (totalSplitCount < min && totalSplitCount < maxSplitsPerNode) {
chosenNode = node;
min = totalSplitCount;
}
}
if (chosenNode == null) {
// min is guaranteed to be MAX_VALUE at this line
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getQueuedSplitCountForStage(node);
if (totalSplitCount < min && totalSplitCount < maxPendingSplitsPerTask) {
chosenNode = node;
min = totalSplitCount;
}
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
} else {
if (split.isRemotelyAccessible()) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
}
if (!stage.isPresent() || stage.get().getStateMachine().getConsumerScanNode() == null) {
if (splitsToBeRedistributed) {
// skip for consumer
equateDistribution(assignment, assignmentStats, nodeMapSlice);
}
}
// Check if the current stage has a TableScanNode which is reading the table for the 1st time
if (stage.isPresent() && stage.get().getStateMachine().getProducerScanNode() != null) {
// if node exists, get the TableScanNode and annotate it as producer
saveProducerScanNodeAssignment(stage, assignment, assignmentStats);
}
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(blocked, assignment);
}
use of io.prestosql.execution.NodeTaskMap in project hetu-core by openlookeng.
the class SplitCacheAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
Map<CatalogName, Map<String, InternalNode>> activeNodesByCatalog = new HashMap<>();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
Set<Split> uncacheableSplits = new HashSet<>();
Set<Split> newCacheableSplits = new HashSet<>();
SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
for (Split split : splits) {
Optional<String> assignedNodeId = Optional.empty();
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
assignedNodeId = splitCacheMap.getCachedNodeId(splitKey);
}
if (!split.getConnectorSplit().isCacheable() || splitKey == null) {
// uncacheable splits will be scheduled using default node selector
uncacheableSplits.add(split);
continue;
}
Map<String, InternalNode> activeNodes = activeNodesByCatalog.computeIfAbsent(split.getCatalogName(), catalogName -> nodeManager.getActiveConnectorNodes(catalogName).stream().collect(Collectors.toMap(InternalNode::getNodeIdentifier, Function.identity())));
InternalNode assignedNode = assignedNodeId.map(activeNodes::get).orElse(null);
// check if a node has been assigned and ensure it is still active before scheduling
if (assignedNode != null) {
// split has been previously assigned to a node
// assign the split to the same node as before
assignment.put(assignedNode, split);
assignmentStats.addAssignedSplit(assignedNode);
} else {
// splits that have not be previously cached or the assigned node is now inactive
newCacheableSplits.add(split);
}
}
log.info("%d out of %d splits already cached. %d new splits to be cached. %d splits cannot be cached.", assignment.size(), splits.size(), newCacheableSplits.size(), uncacheableSplits.size());
Set<Split> unassignedSplits = new HashSet<>();
unassignedSplits.addAll(newCacheableSplits);
unassignedSplits.addAll(uncacheableSplits);
// Compute split assignments for splits that cannot be cached, newly cacheable, and already cached but cached worker is inactive now.
SplitPlacementResult defaultSplitPlacementResult = defaultNodeSelector.computeAssignments(unassignedSplits, existingTasks, stage);
defaultSplitPlacementResult.getAssignments().forEach(((internalNode, split) -> {
// Set or Update cached node id only if split is cacheable
if (newCacheableSplits.contains(split)) {
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
splitCacheMap.addCachedNode(splitKey, internalNode.getNodeIdentifier());
}
}
assignmentStats.addAssignedSplit(internalNode);
}));
assignment.putAll(defaultSplitPlacementResult.getAssignments());
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(defaultSplitPlacementResult.getBlocked(), assignment);
}
use of io.prestosql.execution.NodeTaskMap in project boostkit-bigdata by kunpengcompute.
the class TestHiveIntegrationSmokeTest method testRuseExchangeGroupSplitsMatchingBetweenProducerConsumer.
@Test
public void testRuseExchangeGroupSplitsMatchingBetweenProducerConsumer() {
setUpNodes();
NodeTaskMap nodeTasks = new NodeTaskMap(new FinalizerService());
StageId stageId = new StageId(new QueryId("query"), 0);
UUID uuid = UUID.randomUUID();
PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
Set<Split> splits = createAndGetSplits(10);
Multimap<InternalNode, Split> producerAssignment = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.of(producerStage)).getAssignments();
PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
Multimap<InternalNode, Split> consumerAssignment = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.of(stage)).getAssignments();
assertEquals(consumerAssignment.size(), consumerAssignment.size());
for (InternalNode node : consumerAssignment.keySet()) {
List<Split> splitList = new ArrayList<>();
List<Split> splitList2 = new ArrayList<>();
boolean b = producerAssignment.containsEntry(node, consumerAssignment.get(node));
Collection<Split> producerSplits = producerAssignment.get(node);
Collection<Split> consumerSplits = producerAssignment.get(node);
producerSplits.forEach(s -> splitList.add(s));
List<Split> splitList1 = splitList.get(0).getSplits();
consumerSplits.forEach(s -> splitList2.add(s));
int i = 0;
for (Split split3 : splitList1) {
SplitKey splitKey1 = new SplitKey(split3, TEST_CATALOG, TEST_SCHEMA, TEST_TABLE);
SplitKey splitKey2 = new SplitKey(splitList1.get(i), TEST_CATALOG, TEST_SCHEMA, TEST_TABLE);
boolean f = splitKey1.equals(splitKey2);
assertEquals(true, f);
i++;
}
}
}
use of io.prestosql.execution.NodeTaskMap in project boostkit-bigdata by kunpengcompute.
the class TestHiveIntegrationSmokeTest method testRuseExchangeSplitsGroupNotMatchingBetweenProducerConsumer.
@Test
public void testRuseExchangeSplitsGroupNotMatchingBetweenProducerConsumer() {
setUpNodes();
NodeTaskMap nodeTasks = new NodeTaskMap(new FinalizerService());
StageId stageId = new StageId(new QueryId("query"), 0);
UUID uuid = UUID.randomUUID();
PlanFragment testFragmentProducer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_PRODUCER, uuid, 1);
PlanNodeId tableScanNodeId = new PlanNodeId("plan_id");
StageExecutionPlan producerStageExecutionPlan = new StageExecutionPlan(testFragmentProducer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
SqlStageExecution producerStage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), producerStageExecutionPlan.getFragment(), producerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
Set<Split> producerSplits = createAndGetSplits(10);
Multimap<InternalNode, Split> producerAssignment = nodeSelector.computeAssignments(producerSplits, ImmutableList.copyOf(taskMap.values()), Optional.of(producerStage)).getAssignments();
PlanFragment testFragmentConsumer = createTableScanPlanFragment("build", ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_CONSUMER, uuid, 1);
StageExecutionPlan consumerStageExecutionPlan = new StageExecutionPlan(testFragmentConsumer, ImmutableMap.of(tableScanNodeId, new ConnectorAwareSplitSource(CONNECTOR_ID, createFixedSplitSource(0, TestingSplit::createRemoteSplit))), ImmutableList.of(), ImmutableMap.of(tableScanNodeId, new TableInfo(new QualifiedObjectName("test", TEST_SCHEMA, "test"), TupleDomain.all())));
SqlStageExecution stage = createSqlStageExecution(stageId, new TestSqlTaskManager.MockLocationFactory().createStageLocation(stageId), consumerStageExecutionPlan.getFragment(), consumerStageExecutionPlan.getTables(), new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor), TEST_SESSION_REUSE, true, nodeTasks, remoteTaskExecutor, new NoOpFailureDetector(), new SplitSchedulerStats(), new DynamicFilterService(new LocalStateStoreProvider(new SeedStoreManager(new FileSystemClientManager()))), new QuerySnapshotManager(stageId.getQueryId(), NOOP_SNAPSHOT_UTILS, TEST_SESSION));
Set<Split> consumerSplits = createAndGetSplits(50);
try {
Multimap<InternalNode, Split> consumerAssignment = nodeSelector.computeAssignments(consumerSplits, ImmutableList.copyOf(taskMap.values()), Optional.of(stage)).getAssignments();
} catch (PrestoException e) {
assertEquals("Producer & consumer splits are not same", e.getMessage());
return;
}
assertEquals(false, true);
}
Aggregations