use of com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader in project pinot by linkedin.
the class SortedInvertedIndexBasedFilterOperator method nextFilterBlock.
@Override
public BaseFilterBlock nextFilterBlock(BlockId BlockId) {
final SortedInvertedIndexReader invertedIndex = (SortedInvertedIndexReader) dataSource.getInvertedIndex();
List<IntPair> pairs = new ArrayList<IntPair>();
// At this point, we need to create a list of matching docId ranges. There are two kinds of operators:
//
// - "Additive" operators, such as EQ, IN and RANGE build up a list of ranges and merge overlapping/adjacent ones,
// clipping the ranges to [startDocId; endDocId]
//
// - "Subtractive" operators, such as NEQ and NOT IN build up a list of ranges that do not match and build a list of
// matching intervals by subtracting a list of non-matching intervals from the given range of
// [startDocId; endDocId]
//
// For now, we don't look at the cardinality of the column's dictionary, although we should do that if someone
// specifies a very large list of IN/NOT IN predicates relative to the column's cardinality or a very large/small
// range predicate relative to the cardinality. However, as adjacent ranges get merged before returning the final
// list of ranges, the only drawback is that we use a lot of memory during the filter block evaluation.
final int[] dictionaryIds;
boolean additiveRanges = true;
switch(predicate.getType()) {
case EQ:
case IN:
case RANGE:
dictionaryIds = predicateEvaluator.getMatchingDictionaryIds();
break;
case NEQ:
case NOT_IN:
additiveRanges = false;
dictionaryIds = predicateEvaluator.getNonMatchingDictionaryIds();
break;
case REGEX:
throw new RuntimeException("Regex is not supported");
default:
throw new RuntimeException("Unimplemented!");
}
if (0 < dictionaryIds.length) {
// Sort the dictionaryIds in ascending order, so that their respective ranges are adjacent if their
// dictionaryIds are adjacent
Arrays.sort(dictionaryIds);
IntPair lastPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[0]);
IntRanges.clip(lastPair, startDocId, endDocId);
for (int i = 1; i < dictionaryIds.length; i++) {
IntPair currentPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[i]);
IntRanges.clip(currentPair, startDocId, endDocId);
// If the previous range is degenerate, just keep the current one
if (IntRanges.isInvalid(lastPair)) {
lastPair = currentPair;
continue;
}
// otherwise add the previous range and keep the current one to be added
if (IntRanges.rangesAreMergeable(lastPair, currentPair)) {
IntRanges.mergeIntoFirst(lastPair, currentPair);
} else {
if (!IntRanges.isInvalid(lastPair)) {
pairs.add(lastPair);
}
lastPair = currentPair;
}
}
// Add the last range if it's valid
if (!IntRanges.isInvalid(lastPair)) {
pairs.add(lastPair);
}
}
if (!additiveRanges) {
// If the ranges are not additive ranges, our list of pairs is a list of "holes" in the [startDocId; endDocId]
// range. We need to take this list of pairs and invert it. To do so, there are three cases:
//
// - No holes, in which case the final range is [startDocId; endDocId]
// - One or more hole, in which case the final ranges are [startDocId; firstHoleStartDocId - 1] and
// [lastHoleEndDocId + 1; endDocId] and ranges in between other holes
List<IntPair> newPairs = new ArrayList<>();
if (pairs.isEmpty()) {
newPairs.add(new IntPair(startDocId, endDocId));
} else {
// Add the first filled area (between startDocId and the first hole)
IntPair firstHole = pairs.get(0);
IntPair firstRange = new IntPair(startDocId, firstHole.getLeft() - 1);
if (!IntRanges.isInvalid(firstRange)) {
newPairs.add(firstRange);
}
// Add the filled areas between contiguous holes
int pairCount = pairs.size();
for (int i = 1; i < pairCount; i++) {
IntPair previousHole = pairs.get(i - 1);
IntPair currentHole = pairs.get(i);
IntPair range = new IntPair(previousHole.getRight() + 1, currentHole.getLeft() - 1);
if (!IntRanges.isInvalid(range)) {
newPairs.add(range);
}
}
// Add the last filled area (between the last hole and endDocId)
IntPair lastHole = pairs.get(pairs.size() - 1);
IntPair lastRange = new IntPair(lastHole.getRight() + 1, endDocId);
if (!IntRanges.isInvalid(lastRange)) {
newPairs.add(lastRange);
}
}
pairs = newPairs;
}
LOGGER.debug("Creating a Sorted Block with pairs: {}", pairs);
sortedBlock = new SortedBlock(dataSource.getOperatorName(), pairs);
return sortedBlock;
}
Aggregations