use of com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf in project pinot by linkedin.
the class StarTreeJsonNode method build.
private int build(StarTreeIndexNodeInterf indexNode, StarTreeJsonNode json) {
Iterator<? extends StarTreeIndexNodeInterf> childrenIterator = indexNode.getChildrenIterator();
if (!childrenIterator.hasNext()) {
return 0;
}
int childDimensionId = indexNode.getChildDimensionName();
String childDimensionName = dimensionNameToIndexMap.inverse().get(childDimensionId);
Dictionary dictionary = dictionaries.get(childDimensionName);
int totalChildNodes = indexNode.getNumChildren();
Comparator<Pair<String, Integer>> comparator = new Comparator<Pair<String, Integer>>() {
@Override
public int compare(Pair<String, Integer> o1, Pair<String, Integer> o2) {
return -1 * Integer.compare(o1.getRight(), o2.getRight());
}
};
MinMaxPriorityQueue<Pair<String, Integer>> queue = MinMaxPriorityQueue.orderedBy(comparator).maximumSize(MAX_CHILDREN).create();
StarTreeJsonNode allNode = null;
while (childrenIterator.hasNext()) {
StarTreeIndexNodeInterf childIndexNode = childrenIterator.next();
int childDimensionValueId = childIndexNode.getDimensionValue();
String childDimensionValue = "ALL";
if (childDimensionValueId != StarTreeIndexNodeInterf.ALL) {
childDimensionValue = dictionary.get(childDimensionValueId).toString();
}
StarTreeJsonNode childJson = new StarTreeJsonNode(childDimensionValue);
totalChildNodes += build(childIndexNode, childJson);
if (childDimensionValueId != StarTreeIndexNodeInterf.ALL) {
json.addChild(childJson);
queue.add(ImmutablePair.of(childDimensionValue, totalChildNodes));
} else {
allNode = childJson;
}
}
//put ALL node at the end
if (allNode != null) {
json.addChild(allNode);
}
if (totalChildNodes > MAX_CHILDREN) {
Iterator<Pair<String, Integer>> qIterator = queue.iterator();
Set<String> topKDimensions = new HashSet<>();
topKDimensions.add("ALL");
while (qIterator.hasNext()) {
topKDimensions.add(qIterator.next().getKey());
}
Iterator<StarTreeJsonNode> iterator = json.getChildren().iterator();
while (iterator.hasNext()) {
StarTreeJsonNode next = iterator.next();
if (!topKDimensions.contains(next.getName())) {
iterator.remove();
}
}
}
return totalChildNodes;
}
use of com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf in project pinot by linkedin.
the class StarTreeIndexOperator method addMatchingChildrenToQueue.
/**
* Helper method to add matching children into the search queue.
* - If predicate can be applied (i.e. equality predicate that is eligible), add the child
* satisfying the predicate into the queue.
* - If predicate cannot be applied (either inEligible or nonEquality), add all children to the
* queue.
* - If no predicate on the column, add the star-child to the queue
* @param searchQueue
* @param node
* @param column
* @param remainingPredicateColumns
* @param remainingGroupByColumns
*/
private void addMatchingChildrenToQueue(Queue<SearchEntry> searchQueue, StarTreeIndexNodeInterf node, String column, HashSet<String> remainingPredicateColumns, HashSet<String> remainingGroupByColumns) {
if (predicateColumns.contains(column)) {
// Check if there is exact match filter on this column
PredicateEntry predicateEntry = predicatesMap.get(column);
remainingPredicateColumns.remove(column);
remainingGroupByColumns.remove(column);
int[] matchingDictionaryIds = predicateEntry.predicateEvaluator.getMatchingDictionaryIds();
for (int matchingDictionaryId : matchingDictionaryIds) {
StarTreeIndexNodeInterf child = node.getChildForDimensionValue(matchingDictionaryId);
if (child != null) {
addNodeToSearchQueue(searchQueue, child, remainingPredicateColumns, remainingGroupByColumns);
}
}
} else {
int nextValueId;
if (groupByColumns.contains(column) || predicatesMap.containsKey(column) || (node.getChildForDimensionValue(StarTreeIndexNodeInterf.ALL) == null)) {
Iterator<? extends StarTreeIndexNodeInterf> childrenIterator = node.getChildrenIterator();
while (childrenIterator.hasNext()) {
StarTreeIndexNodeInterf child = childrenIterator.next();
if (child.getDimensionValue() != StarTreeIndexNodeInterf.ALL) {
remainingPredicateColumns.remove(column);
remainingGroupByColumns.remove(column);
addNodeToSearchQueue(searchQueue, child, remainingPredicateColumns, remainingGroupByColumns);
}
}
} else {
// Since we have a star node and no group by on this column we can take lose this dimension
// by taking star node path
nextValueId = StarTreeIndexNodeInterf.ALL;
addNodeToSearchQueue(searchQueue, node.getChildForDimensionValue(nextValueId), remainingPredicateColumns, remainingGroupByColumns);
}
}
}
use of com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf in project pinot by linkedin.
the class StarTreeIndexOperator method findMatchingLeafNodes.
private Queue<SearchEntry> findMatchingLeafNodes() {
Queue<SearchEntry> matchedEntries = new LinkedList<>();
Queue<SearchEntry> searchQueue = new LinkedList<>();
HashBiMap<String, Integer> dimensionIndexToNameMapping = segment.getStarTree().getDimensionNameToIndexMap();
SearchEntry startEntry = new SearchEntry();
startEntry.starTreeIndexnode = segment.getStarTree().getRoot();
startEntry.remainingPredicateColumns = new HashSet<>(predicatesMap.keySet());
startEntry.remainingGroupByColumns = new HashSet<>(groupByColumns);
searchQueue.add(startEntry);
while (!searchQueue.isEmpty()) {
SearchEntry searchEntry = searchQueue.remove();
StarTreeIndexNodeInterf current = searchEntry.starTreeIndexnode;
HashSet<String> remainingPredicateColumns = searchEntry.remainingPredicateColumns;
HashSet<String> remainingGroupByColumns = searchEntry.remainingGroupByColumns;
// Check if its leaf, or if there are no remaining predicates/groupbycolumns, and node has valid aggregated docId
if (current.isLeaf() || (remainingPredicateColumns.isEmpty() && remainingGroupByColumns.isEmpty()) && isValidAggregatedDocId(current.getAggregatedDocumentId())) {
// reached leaf
matchedEntries.add(searchEntry);
continue;
}
// Find next set of nodes to search
String nextDimension = dimensionIndexToNameMapping.inverse().get(current.getChildDimensionName());
HashSet<String> newRemainingPredicateColumns = new HashSet<>();
newRemainingPredicateColumns.addAll(remainingPredicateColumns);
HashSet<String> newRemainingGroupByColumns = new HashSet<>();
newRemainingGroupByColumns.addAll(remainingGroupByColumns);
addMatchingChildrenToQueue(searchQueue, current, nextDimension, newRemainingPredicateColumns, newRemainingGroupByColumns);
}
return matchedEntries;
}
use of com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf in project pinot by linkedin.
the class StarTreeIndexOperator method buildMatchingLeafOperators.
/**
* Helper method to build a list of operators for matching leaf nodes.
* - Finds all leaf nodes that match the predicates
* - Iterates over all the matching leaf nodes, and generate a list of matching ranges
* @return
*/
private List<BaseFilterOperator> buildMatchingLeafOperators() {
int totalDocsToScan = 0;
int numExactlyMatched = 0;
long start = System.currentTimeMillis();
final MutableRoaringBitmap exactlyMatchedDocsBitmap = new MutableRoaringBitmap();
Queue<SearchEntry> matchedEntries = findMatchingLeafNodes();
// Iterate over the matching nodes. For each column, generate the list of ranges.
List<BaseFilterOperator> matchingLeafOperators = new ArrayList<>();
for (SearchEntry matchedEntry : matchedEntries) {
BaseFilterOperator matchingLeafOperator = null;
StarTreeIndexNodeInterf matchedLeafNode = matchedEntry.starTreeIndexnode;
int startDocId = matchedLeafNode.getStartDocumentId();
int endDocId = matchedLeafNode.getEndDocumentId();
if (matchedEntry.remainingPredicateColumns.isEmpty()) {
// No more filters to apply
// Use aggregated doc for this leaf node if possible.
int aggregatedDocumentId = matchedLeafNode.getAggregatedDocumentId();
if (isValidAggregatedDocId(aggregatedDocumentId) && matchedEntry.remainingGroupByColumns.isEmpty()) {
exactlyMatchedDocsBitmap.add(aggregatedDocumentId);
numExactlyMatched = numExactlyMatched + 1;
} else {
// Have to scan all the documents under this leaf node
exactlyMatchedDocsBitmap.add(startDocId, endDocId);
numExactlyMatched += (endDocId - startDocId);
}
} else {
Map<String, PredicateEntry> remainingPredicatesMap = computeRemainingPredicates(matchedEntry);
List<BaseFilterOperator> filterOperators = createFilterOperatorsForRemainingPredicates(matchedEntry, remainingPredicatesMap);
if (filterOperators.size() == 0) {
// The predicates are applied, but we cannot use aggregated doc, as we might have lost
// the group by dimensions, in the aggregated doc.
exactlyMatchedDocsBitmap.add(startDocId, endDocId);
numExactlyMatched += (endDocId - startDocId);
} else if (filterOperators.size() == 1) {
matchingLeafOperator = filterOperators.get(0);
} else {
matchingLeafOperator = new AndOperator(filterOperators);
}
if (matchingLeafOperator != null) {
matchingLeafOperators.add(matchingLeafOperator);
}
}
totalDocsToScan += (endDocId - startDocId);
LOGGER.debug("{}", matchedLeafNode);
}
// Add an operator for exactlyMatchedDocs
if (numExactlyMatched > 0) {
matchingLeafOperators.add(createFilterOperator(exactlyMatchedDocsBitmap));
totalDocsToScan += numExactlyMatched;
}
long end = System.currentTimeMillis();
LOGGER.debug("Found {} matching leaves, took {} ms to create remaining filter operators. Total docs to scan:{}", matchedEntries.size(), (end - start), totalDocsToScan);
return matchingLeafOperators;
}
Aggregations