use of io.prestosql.metadata.Split in project hetu-core by openlookeng.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMapSlice, ImmutableSet.of());
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
// splitsToBeRedistributed becomes true only when splits go through locality-based assignment
boolean splitsToBeRedistributed = false;
Set<Split> remainingSplits = new HashSet<>();
// Check if the current stage has a TableScanNode which is reading the table for the 2nd time or beyond
if (stage.isPresent() && stage.get().getStateMachine().getConsumerScanNode() != null) {
try {
// if node exists, get the TableScanNode and cast it as consumer
TableScanNode consumer = stage.get().getStateMachine().getConsumerScanNode();
// all tables part of this stage
Map<PlanNodeId, TableInfo> tables = stage.get().getStageInfo().getTables();
QualifiedObjectName tableName;
for (Map.Entry<PlanNodeId, TableInfo> entry : tables.entrySet()) {
tableName = entry.getValue().getTableName();
if (tableSplitAssignmentInfo.getReuseTableScanMappingIdSplitAssignmentMap().containsKey(consumer.getReuseTableScanMappingId())) {
// compare splitkey using equals and then assign nodes accordingly.
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment = tableSplitAssignmentInfo.getSplitKeyNodeAssignment(consumer.getReuseTableScanMappingId());
Set<SplitKey> splitKeySet = splitKeyNodeAssignment.keySet();
assignment.putAll(createConsumerScanNodeAssignment(tableName, splits, splitKeySet, splitKeyNodeAssignment));
for (Map.Entry<InternalNode, Split> nodeAssignmentEntry : assignment.entries()) {
InternalNode node = nodeAssignmentEntry.getKey();
assignmentStats.addAssignedSplit(node);
}
}
}
log.debug("Consumer:: Assignment size is " + assignment.size() + " ,Assignment is " + assignment + " ,Assignment Stats is " + assignmentStats);
} catch (NotImplementedException e) {
log.error("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
throw new UnsupportedOperationException("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
}
} else {
// optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
if (optimizedLocalScheduling) {
// should not hit for consumer case
for (Split split : splits) {
if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitCount(ownerNode) < maxSplitsPerNode).min(comparingInt(assignmentStats::getTotalSplitCount));
if (chosenNode.isPresent()) {
assignment.put(chosenNode.get(), split);
// check later
assignmentStats.addAssignedSplit(chosenNode.get());
splitsToBeRedistributed = true;
continue;
}
}
remainingSplits.add(split);
}
} else {
remainingSplits = splits;
}
for (Split split : remainingSplits) {
randomCandidates.reset();
List<InternalNode> candidateNodes;
if (!split.isRemotelyAccessible()) {
candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
} else {
candidateNodes = selectNodes(minCandidates, randomCandidates);
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = null;
int min = Integer.MAX_VALUE;
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getTotalSplitCount(node);
if (totalSplitCount < min && totalSplitCount < maxSplitsPerNode) {
chosenNode = node;
min = totalSplitCount;
}
}
if (chosenNode == null) {
// min is guaranteed to be MAX_VALUE at this line
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getQueuedSplitCountForStage(node);
if (totalSplitCount < min && totalSplitCount < maxPendingSplitsPerTask) {
chosenNode = node;
min = totalSplitCount;
}
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
} else {
if (split.isRemotelyAccessible()) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
}
if (!stage.isPresent() || stage.get().getStateMachine().getConsumerScanNode() == null) {
if (splitsToBeRedistributed) {
// skip for consumer
equateDistribution(assignment, assignmentStats, nodeMapSlice);
}
}
// Check if the current stage has a TableScanNode which is reading the table for the 1st time
if (stage.isPresent() && stage.get().getStateMachine().getProducerScanNode() != null) {
// if node exists, get the TableScanNode and annotate it as producer
saveProducerScanNodeAssignment(stage, assignment, assignmentStats);
}
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(blocked, assignment);
}
use of io.prestosql.metadata.Split in project hetu-core by openlookeng.
the class SplitCacheAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
Map<CatalogName, Map<String, InternalNode>> activeNodesByCatalog = new HashMap<>();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
Set<Split> uncacheableSplits = new HashSet<>();
Set<Split> newCacheableSplits = new HashSet<>();
SplitCacheMap splitCacheMap = SplitCacheMap.getInstance();
for (Split split : splits) {
Optional<String> assignedNodeId = Optional.empty();
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
assignedNodeId = splitCacheMap.getCachedNodeId(splitKey);
}
if (!split.getConnectorSplit().isCacheable() || splitKey == null) {
// uncacheable splits will be scheduled using default node selector
uncacheableSplits.add(split);
continue;
}
Map<String, InternalNode> activeNodes = activeNodesByCatalog.computeIfAbsent(split.getCatalogName(), catalogName -> nodeManager.getActiveConnectorNodes(catalogName).stream().collect(Collectors.toMap(InternalNode::getNodeIdentifier, Function.identity())));
InternalNode assignedNode = assignedNodeId.map(activeNodes::get).orElse(null);
// check if a node has been assigned and ensure it is still active before scheduling
if (assignedNode != null) {
// split has been previously assigned to a node
// assign the split to the same node as before
assignment.put(assignedNode, split);
assignmentStats.addAssignedSplit(assignedNode);
} else {
// splits that have not be previously cached or the assigned node is now inactive
newCacheableSplits.add(split);
}
}
log.info("%d out of %d splits already cached. %d new splits to be cached. %d splits cannot be cached.", assignment.size(), splits.size(), newCacheableSplits.size(), uncacheableSplits.size());
Set<Split> unassignedSplits = new HashSet<>();
unassignedSplits.addAll(newCacheableSplits);
unassignedSplits.addAll(uncacheableSplits);
// Compute split assignments for splits that cannot be cached, newly cacheable, and already cached but cached worker is inactive now.
SplitPlacementResult defaultSplitPlacementResult = defaultNodeSelector.computeAssignments(unassignedSplits, existingTasks, stage);
defaultSplitPlacementResult.getAssignments().forEach(((internalNode, split) -> {
// Set or Update cached node id only if split is cacheable
if (newCacheableSplits.contains(split)) {
SplitKey splitKey = createSplitKey(split);
if (splitKey != null) {
splitCacheMap.addCachedNode(splitKey, internalNode.getNodeIdentifier());
}
}
assignmentStats.addAssignedSplit(internalNode);
}));
assignment.putAll(defaultSplitPlacementResult.getAssignments());
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(defaultSplitPlacementResult.getBlocked(), assignment);
}
use of io.prestosql.metadata.Split in project hetu-core by openlookeng.
the class TableSplitAssignmentInfo method setPerTablesplitKeyNodeAssignment.
/**
* Store the inverted assignment information [Split-Node mapping] for a given reuseTableScanMappingId number
* @param qualifiedTableName name of the table which is as a producer(reads data from disk for the first time)
* @param reuseTableScanMappingId unique identifier for producer-consumer pair for a reused table
* @param assignmentInformation node-split assignment multimap created as part of the stage that processes this table
* NOTE: Works only with Hive data as other connectors don't support SplitKey currently
*/
private void setPerTablesplitKeyNodeAssignment(QualifiedObjectName qualifiedTableName, UUID reuseTableScanMappingId, Multimap<InternalNode, Split> assignmentInformation) {
String catalog = qualifiedTableName.getCatalogName();
String schema = qualifiedTableName.getSchemaName();
String table = qualifiedTableName.getObjectName();
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment;
try {
splitKeyNodeAssignment = perTableReuseTableScanMappingIdSplitKeyNodeAssignment.get(reuseTableScanMappingId);
if (splitKeyNodeAssignment == null) {
splitKeyNodeAssignment = new HashMap<>();
}
for (InternalNode node : assignmentInformation.keySet()) {
Collection<Split> assigmentSplits = assignmentInformation.get(node);
for (Split assigmentSplit : assigmentSplits) {
if (assigmentSplit.getConnectorSplit().getSplitCount() > 1) {
for (Split unwrappedSplit : assigmentSplit.getSplits()) {
SplitKey splitKey = new SplitKey(unwrappedSplit, catalog, schema, table);
splitKeyNodeAssignment.put(splitKey, node);
}
} else {
SplitKey splitKey = new SplitKey(assigmentSplit, catalog, schema, table);
splitKeyNodeAssignment.put(splitKey, node);
}
}
}
perTableReuseTableScanMappingIdSplitKeyNodeAssignment.put(reuseTableScanMappingId, splitKeyNodeAssignment);
} catch (NotImplementedException e) {
log.error("Unsupported split type: " + e);
throw new UnsupportedOperationException("Unsupported split type: " + e);
}
}
use of io.prestosql.metadata.Split in project hetu-core by openlookeng.
the class SplitFiltering method getFilteredSplit.
public static List<Split> getFilteredSplit(Optional<RowExpression> expression, Optional<String> tableName, Map<Symbol, ColumnHandle> assignments, SplitSource.SplitBatch nextSplits, HeuristicIndexerManager heuristicIndexerManager) {
if (!expression.isPresent() || !tableName.isPresent()) {
return nextSplits.getSplits();
}
List<Split> allSplits = nextSplits.getSplits();
String fullQualifiedTableName = tableName.get();
long initialSplitsSize = allSplits.size();
List<IndexRecord> indexRecords;
try {
indexRecords = heuristicIndexerManager.getIndexClient().getAllIndexRecords();
} catch (IOException e) {
LOG.debug("Filtering can't be done because not able to read index records", e);
return allSplits;
}
Set<String> referencedColumns = new HashSet<>();
getAllColumns(expression.get(), referencedColumns, assignments);
Map<String, IndexRecord> forwardIndexRecords = new HashMap<>();
Map<String, IndexRecord> invertedIndexRecords = new HashMap<>();
for (IndexRecord indexRecord : indexRecords) {
if (indexRecord.qualifiedTable.equalsIgnoreCase(fullQualifiedTableName)) {
List<String> columnsInIndex = Arrays.asList(indexRecord.columns);
for (String column : referencedColumns) {
if (columnsInIndex.contains(column)) {
String indexRecordKey = indexRecord.qualifiedTable + "/" + column + "/" + indexRecord.indexType;
if (INVERTED_INDEX.contains(indexRecord.indexType.toUpperCase())) {
forwardIndexRecords.put(indexRecordKey, indexRecord);
} else {
invertedIndexRecords.put(indexRecordKey, indexRecord);
}
}
}
}
}
List<Split> splitsToReturn;
if (forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
return allSplits;
} else if (!forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
splitsToReturn = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
} else if (!invertedIndexRecords.isEmpty() && forwardIndexRecords.isEmpty()) {
splitsToReturn = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
} else {
// filter using both indexes and return the smallest set of splits.
List<Split> splitsToReturn1 = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
List<Split> splitsToReturn2 = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
splitsToReturn = splitsToReturn1.size() < splitsToReturn2.size() ? splitsToReturn1 : splitsToReturn2;
}
if (LOG.isDebugEnabled()) {
LOG.debug("totalSplitsProcessed: " + totalSplitsProcessed.addAndGet(initialSplitsSize));
LOG.debug("splitsFiltered: " + splitsFiltered.addAndGet(initialSplitsSize - splitsToReturn.size()));
}
return splitsToReturn;
}
use of io.prestosql.metadata.Split in project hetu-core by openlookeng.
the class SplitFiltering method filterUsingInvertedIndex.
private static List<Split> filterUsingInvertedIndex(RowExpression expression, List<Split> inputSplits, String fullQualifiedTableName, Set<String> referencedColumns, Map<String, IndexRecord> indexRecordKeyToRecordMap, HeuristicIndexerManager indexerManager) {
try {
Map<String, Long> inputMaxLastUpdated = new HashMap<>();
Map<String, Long> indexMaxLastUpdated = new HashMap<>();
Map<String, List<Split>> partitionSplitMap = new HashMap<>();
for (Split split : inputSplits) {
String filePathStr = split.getConnectorSplit().getFilePath();
String indexKey = getPartitionKeyOrElse(filePathStr, TABLE_LEVEL_KEY);
long lastUpdated = split.getConnectorSplit().getLastModifiedTime();
if (!inputMaxLastUpdated.containsKey(indexKey) || lastUpdated > inputMaxLastUpdated.get(indexKey)) {
inputMaxLastUpdated.put(indexKey, lastUpdated);
}
if (!partitionSplitMap.containsKey(indexKey)) {
partitionSplitMap.put(indexKey, new ArrayList<>());
}
partitionSplitMap.get(indexKey).add(split);
}
// Split is not compliant to table structure. Return all the splits
if (partitionSplitMap.isEmpty()) {
return inputSplits;
}
// col -> list of all indices on this column (all partitions)
Map<String, List<IndexMetadata>> allIndices = new HashMap<>();
// index loading and verification
for (String column : referencedColumns) {
List<IndexMetadata> indexMetadataList = new ArrayList<>();
for (String indexType : INVERTED_INDEX) {
indexMetadataList.addAll(getCache(indexerManager.getIndexClient()).getIndices(fullQualifiedTableName, column, indexType, partitionSplitMap.keySet(), Collections.max(inputMaxLastUpdated.values()), indexRecordKeyToRecordMap));
}
// If any of the split contains data which is modified after the index was created, return without filtering
for (IndexMetadata index : indexMetadataList) {
String partitionKey = getPartitionKeyOrElse(index.getUri(), TABLE_LEVEL_KEY);
long lastModifiedTime = Long.parseLong(index.getIndex().getProperties().getProperty(MAX_MODIFIED_TIME));
indexMaxLastUpdated.put(partitionKey, lastModifiedTime);
}
allIndices.put(column, indexMetadataList);
}
// lookup index
IndexFilter filter = indexerManager.getIndexFilter(allIndices);
Iterator<String> iterator = filter.lookUp(expression);
if (iterator == null) {
throw new IndexLookUpException();
}
// all positioned looked up from index, organized by file path
Map<String, List<Pair<Long, Long>>> lookUpResults = new HashMap<>();
while (iterator.hasNext()) {
SerializationUtils.LookUpResult parsedLookUpResult = deserializeStripeSymbol(iterator.next());
if (!lookUpResults.containsKey(parsedLookUpResult.filepath)) {
lookUpResults.put(parsedLookUpResult.filepath, new ArrayList<>());
}
lookUpResults.get(parsedLookUpResult.filepath).add(parsedLookUpResult.stripe);
}
// filtering
List<Split> filteredSplits = new ArrayList<>();
for (Map.Entry<String, List<Split>> entry : partitionSplitMap.entrySet()) {
String partitionKey = entry.getKey();
// the partition is indexed by its own partition's index
boolean partitionHasOwnIndex = indexMaxLastUpdated.containsKey(partitionKey);
// the partition is covered by a table-level index
boolean partitionHasTableLevelIndex = indexMaxLastUpdated.size() == 1 && indexMaxLastUpdated.containsKey(TABLE_LEVEL_KEY);
if (!partitionHasOwnIndex && !partitionHasTableLevelIndex) {
filteredSplits.addAll(entry.getValue());
} else {
long indexLastModifiedTimeOfThisPartition;
if (partitionHasOwnIndex) {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(partitionKey);
} else {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(TABLE_LEVEL_KEY);
}
for (Split split : entry.getValue()) {
String filePathStr = new URI(split.getConnectorSplit().getFilePath()).getPath();
if (split.getConnectorSplit().getLastModifiedTime() > indexLastModifiedTimeOfThisPartition) {
filteredSplits.add(split);
} else if (lookUpResults.containsKey(filePathStr)) {
Pair<Long, Long> targetRange = new Pair<>(split.getConnectorSplit().getStartIndex(), split.getConnectorSplit().getEndIndex());
// do stripe matching: check if [targetStart, targetEnd] has any overlapping with the matching stripes
// first sort matching stripes, e.g. (5,10), (18,25), (30,35), (35, 40)
// then do binary search for both start and end of the target
List<Pair<Long, Long>> stripes = lookUpResults.get(filePathStr);
stripes.sort(Comparator.comparingLong(Pair::getFirst));
if (rangeSearch(stripes, targetRange)) {
filteredSplits.add(split);
}
}
}
}
}
return filteredSplits;
} catch (Throwable e) {
LOG.debug("Exception occurred while filtering. Returning original splits", e);
return inputSplits;
}
}
Aggregations