use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMapSlice, ImmutableSet.of());
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
// splitsToBeRedistributed becomes true only when splits go through locality-based assignment
boolean splitsToBeRedistributed = false;
Set<Split> remainingSplits = new HashSet<>();
// Check if the current stage has a TableScanNode which is reading the table for the 2nd time or beyond
if (stage.isPresent() && stage.get().getStateMachine().getConsumerScanNode() != null) {
try {
// if node exists, get the TableScanNode and cast it as consumer
TableScanNode consumer = stage.get().getStateMachine().getConsumerScanNode();
// all tables part of this stage
Map<PlanNodeId, TableInfo> tables = stage.get().getStageInfo().getTables();
QualifiedObjectName tableName;
for (Map.Entry<PlanNodeId, TableInfo> entry : tables.entrySet()) {
tableName = entry.getValue().getTableName();
if (tableSplitAssignmentInfo.getReuseTableScanMappingIdSplitAssignmentMap().containsKey(consumer.getReuseTableScanMappingId())) {
// compare splitkey using equals and then assign nodes accordingly.
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment = tableSplitAssignmentInfo.getSplitKeyNodeAssignment(consumer.getReuseTableScanMappingId());
Set<SplitKey> splitKeySet = splitKeyNodeAssignment.keySet();
assignment.putAll(createConsumerScanNodeAssignment(tableName, splits, splitKeySet, splitKeyNodeAssignment));
for (Map.Entry<InternalNode, Split> nodeAssignmentEntry : assignment.entries()) {
InternalNode node = nodeAssignmentEntry.getKey();
assignmentStats.addAssignedSplit(node);
}
}
}
log.debug("Consumer:: Assignment size is " + assignment.size() + " ,Assignment is " + assignment + " ,Assignment Stats is " + assignmentStats);
} catch (NotImplementedException e) {
log.error("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
throw new UnsupportedOperationException("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
}
} else {
// optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
if (optimizedLocalScheduling) {
// should not hit for consumer case
for (Split split : splits) {
if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitCount(ownerNode) < maxSplitsPerNode).min(comparingInt(assignmentStats::getTotalSplitCount));
if (chosenNode.isPresent()) {
assignment.put(chosenNode.get(), split);
// check later
assignmentStats.addAssignedSplit(chosenNode.get());
splitsToBeRedistributed = true;
continue;
}
}
remainingSplits.add(split);
}
} else {
remainingSplits = splits;
}
for (Split split : remainingSplits) {
randomCandidates.reset();
List<InternalNode> candidateNodes;
if (!split.isRemotelyAccessible()) {
candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
} else {
candidateNodes = selectNodes(minCandidates, randomCandidates);
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = null;
int min = Integer.MAX_VALUE;
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getTotalSplitCount(node);
if (totalSplitCount < min && totalSplitCount < maxSplitsPerNode) {
chosenNode = node;
min = totalSplitCount;
}
}
if (chosenNode == null) {
// min is guaranteed to be MAX_VALUE at this line
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getQueuedSplitCountForStage(node);
if (totalSplitCount < min && totalSplitCount < maxPendingSplitsPerTask) {
chosenNode = node;
min = totalSplitCount;
}
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
} else {
if (split.isRemotelyAccessible()) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
}
if (!stage.isPresent() || stage.get().getStateMachine().getConsumerScanNode() == null) {
if (splitsToBeRedistributed) {
// skip for consumer
equateDistribution(assignment, assignmentStats, nodeMapSlice);
}
}
// Check if the current stage has a TableScanNode which is reading the table for the 1st time
if (stage.isPresent() && stage.get().getStateMachine().getProducerScanNode() != null) {
// if node exists, get the TableScanNode and annotate it as producer
saveProducerScanNodeAssignment(stage, assignment, assignmentStats);
}
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(blocked, assignment);
}
use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.
the class SplitCacheAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
Map<CatalogName, Map<String, InternalNode>> activeNodesByCatalog = new HashMap<>();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
Set<Split> uncacheableSplits = new HashSet<>();
Set<Split> newCacheableSplits = new HashSet<>();
SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
for (Split split : splits) {
Optional<String> assignedNodeId = Optional.empty();
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
assignedNodeId = splitCacheMap.getCachedNodeId(splitKey);
}
if (!split.getConnectorSplit().isCacheable() || splitKey == null) {
// uncacheable splits will be scheduled using default node selector
uncacheableSplits.add(split);
continue;
}
Map<String, InternalNode> activeNodes = activeNodesByCatalog.computeIfAbsent(split.getCatalogName(), catalogName -> nodeManager.getActiveConnectorNodes(catalogName).stream().collect(Collectors.toMap(InternalNode::getNodeIdentifier, Function.identity())));
InternalNode assignedNode = assignedNodeId.map(activeNodes::get).orElse(null);
// check if a node has been assigned and ensure it is still active before scheduling
if (assignedNode != null) {
// split has been previously assigned to a node
// assign the split to the same node as before
assignment.put(assignedNode, split);
assignmentStats.addAssignedSplit(assignedNode);
} else {
// splits that have not be previously cached or the assigned node is now inactive
newCacheableSplits.add(split);
}
}
log.info("%d out of %d splits already cached. %d new splits to be cached. %d splits cannot be cached.", assignment.size(), splits.size(), newCacheableSplits.size(), uncacheableSplits.size());
Set<Split> unassignedSplits = new HashSet<>();
unassignedSplits.addAll(newCacheableSplits);
unassignedSplits.addAll(uncacheableSplits);
// Compute split assignments for splits that cannot be cached, newly cacheable, and already cached but cached worker is inactive now.
SplitPlacementResult defaultSplitPlacementResult = defaultNodeSelector.computeAssignments(unassignedSplits, existingTasks, stage);
defaultSplitPlacementResult.getAssignments().forEach(((internalNode, split) -> {
// Set or Update cached node id only if split is cacheable
if (newCacheableSplits.contains(split)) {
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
splitCacheMap.addCachedNode(splitKey, internalNode.getNodeIdentifier());
}
}
assignmentStats.addAssignedSplit(internalNode);
}));
assignment.putAll(defaultSplitPlacementResult.getAssignments());
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(defaultSplitPlacementResult.getBlocked(), assignment);
}
use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.
the class SplitCacheAwareNodeSelector method createSplitKey.
private SplitKey createSplitKey(Split split) {
SplitKey splitKey = null;
Object splitInfo = split.getConnectorSplit().getInfo();
if (splitInfo instanceof Map) {
Map<String, Object> splitInfoMap = (Map) splitInfo;
String schema = (String) splitInfoMap.getOrDefault("database", splitInfoMap.get("schema"));
if (schema != null) {
splitKey = new SplitKey(split, split.getCatalogName().getCatalogName(), schema, splitInfoMap.get("table").toString());
}
}
return splitKey;
}
use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.
the class TableSplitAssignmentInfo method setPerTablesplitKeyNodeAssignment.
/**
* Store the inverted assignment information [Split-Node mapping] for a given reuseTableScanMappingId number
* @param qualifiedTableName name of the table which is as a producer(reads data from disk for the first time)
* @param reuseTableScanMappingId unique identifier for producer-consumer pair for a reused table
* @param assignmentInformation node-split assignment multimap created as part of the stage that processes this table
* NOTE: Works only with Hive data as other connectors don't support SplitKey currently
*/
private void setPerTablesplitKeyNodeAssignment(QualifiedObjectName qualifiedTableName, UUID reuseTableScanMappingId, Multimap<InternalNode, Split> assignmentInformation) {
String catalog = qualifiedTableName.getCatalogName();
String schema = qualifiedTableName.getSchemaName();
String table = qualifiedTableName.getObjectName();
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment;
try {
splitKeyNodeAssignment = perTableReuseTableScanMappingIdSplitKeyNodeAssignment.get(reuseTableScanMappingId);
if (splitKeyNodeAssignment == null) {
splitKeyNodeAssignment = new HashMap<>();
}
for (InternalNode node : assignmentInformation.keySet()) {
Collection<Split> assigmentSplits = assignmentInformation.get(node);
for (Split assigmentSplit : assigmentSplits) {
if (assigmentSplit.getConnectorSplit().getSplitCount() > 1) {
for (Split unwrappedSplit : assigmentSplit.getSplits()) {
SplitKey splitKey = new SplitKey(unwrappedSplit, catalog, schema, table);
splitKeyNodeAssignment.put(splitKey, node);
}
} else {
SplitKey splitKey = new SplitKey(assigmentSplit, catalog, schema, table);
splitKeyNodeAssignment.put(splitKey, node);
}
}
}
perTableReuseTableScanMappingIdSplitKeyNodeAssignment.put(reuseTableScanMappingId, splitKeyNodeAssignment);
} catch (NotImplementedException e) {
log.error("Unsupported split type: " + e);
throw new UnsupportedOperationException("Unsupported split type: " + e);
}
}
use of io.prestosql.execution.SplitKey in project hetu-core by openlookeng.
the class SimpleNodeSelector method createConsumerScanNodeAssignment.
private Multimap createConsumerScanNodeAssignment(QualifiedObjectName tableName, Set<Split> splits, Set<SplitKey> splitKeySet, HashMap<SplitKey, InternalNode> splitKeyNodeAssignment) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
for (Split split : splits) {
Split aSplit;
if (split.getConnectorSplit().getSplitCount() > 1) {
aSplit = split.getSplits().get(0);
} else {
aSplit = split;
}
boolean matched = false;
SplitKey splitKey = new SplitKey(aSplit, tableName.getCatalogName(), tableName.getSchemaName(), tableName.getObjectName());
for (Iterator<SplitKey> it = splitKeySet.iterator(); it.hasNext(); ) {
SplitKey producerSplitKey = it.next();
if (splitKey.equals(producerSplitKey)) {
InternalNode node = splitKeyNodeAssignment.get(producerSplitKey);
assignment.put(node, split);
matched = true;
break;
}
}
if (matched == false) {
log.debug("split not matched: " + aSplit);
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Producer & consumer splits are not same");
}
}
return assignment;
}
Aggregations