Search in sources :

Example 1 with SortedInvertedIndexReader

use of com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader in project pinot by linkedin.

the class SortedInvertedIndexBasedFilterOperator method nextFilterBlock.

@Override
public BaseFilterBlock nextFilterBlock(BlockId BlockId) {
    final SortedInvertedIndexReader invertedIndex = (SortedInvertedIndexReader) dataSource.getInvertedIndex();
    List<IntPair> pairs = new ArrayList<IntPair>();
    // At this point, we need to create a list of matching docId ranges. There are two kinds of operators:
    //
    // - "Additive" operators, such as EQ, IN and RANGE build up a list of ranges and merge overlapping/adjacent ones,
    //   clipping the ranges to [startDocId; endDocId]
    //
    // - "Subtractive" operators, such as NEQ and NOT IN build up a list of ranges that do not match and build a list of
    //   matching intervals by subtracting a list of non-matching intervals from the given range of
    //   [startDocId; endDocId]
    //
    // For now, we don't look at the cardinality of the column's dictionary, although we should do that if someone
    // specifies a very large list of IN/NOT IN predicates relative to the column's cardinality or a very large/small
    // range predicate relative to the cardinality. However, as adjacent ranges get merged before returning the final
    // list of ranges, the only drawback is that we use a lot of memory during the filter block evaluation.
    final int[] dictionaryIds;
    boolean additiveRanges = true;
    switch(predicate.getType()) {
        case EQ:
        case IN:
        case RANGE:
            dictionaryIds = predicateEvaluator.getMatchingDictionaryIds();
            break;
        case NEQ:
        case NOT_IN:
            additiveRanges = false;
            dictionaryIds = predicateEvaluator.getNonMatchingDictionaryIds();
            break;
        case REGEX:
            throw new RuntimeException("Regex is not supported");
        default:
            throw new RuntimeException("Unimplemented!");
    }
    if (0 < dictionaryIds.length) {
        // Sort the dictionaryIds in ascending order, so that their respective ranges are adjacent if their
        // dictionaryIds are adjacent
        Arrays.sort(dictionaryIds);
        IntPair lastPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[0]);
        IntRanges.clip(lastPair, startDocId, endDocId);
        for (int i = 1; i < dictionaryIds.length; i++) {
            IntPair currentPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[i]);
            IntRanges.clip(currentPair, startDocId, endDocId);
            // If the previous range is degenerate, just keep the current one
            if (IntRanges.isInvalid(lastPair)) {
                lastPair = currentPair;
                continue;
            }
            // otherwise add the previous range and keep the current one to be added
            if (IntRanges.rangesAreMergeable(lastPair, currentPair)) {
                IntRanges.mergeIntoFirst(lastPair, currentPair);
            } else {
                if (!IntRanges.isInvalid(lastPair)) {
                    pairs.add(lastPair);
                }
                lastPair = currentPair;
            }
        }
        // Add the last range if it's valid
        if (!IntRanges.isInvalid(lastPair)) {
            pairs.add(lastPair);
        }
    }
    if (!additiveRanges) {
        // If the ranges are not additive ranges, our list of pairs is a list of "holes" in the [startDocId; endDocId]
        // range. We need to take this list of pairs and invert it. To do so, there are three cases:
        //
        // - No holes, in which case the final range is [startDocId; endDocId]
        // - One or more hole, in which case the final ranges are [startDocId; firstHoleStartDocId - 1] and
        //   [lastHoleEndDocId + 1; endDocId] and ranges in between other holes
        List<IntPair> newPairs = new ArrayList<>();
        if (pairs.isEmpty()) {
            newPairs.add(new IntPair(startDocId, endDocId));
        } else {
            // Add the first filled area (between startDocId and the first hole)
            IntPair firstHole = pairs.get(0);
            IntPair firstRange = new IntPair(startDocId, firstHole.getLeft() - 1);
            if (!IntRanges.isInvalid(firstRange)) {
                newPairs.add(firstRange);
            }
            // Add the filled areas between contiguous holes
            int pairCount = pairs.size();
            for (int i = 1; i < pairCount; i++) {
                IntPair previousHole = pairs.get(i - 1);
                IntPair currentHole = pairs.get(i);
                IntPair range = new IntPair(previousHole.getRight() + 1, currentHole.getLeft() - 1);
                if (!IntRanges.isInvalid(range)) {
                    newPairs.add(range);
                }
            }
            // Add the last filled area (between the last hole and endDocId)
            IntPair lastHole = pairs.get(pairs.size() - 1);
            IntPair lastRange = new IntPair(lastHole.getRight() + 1, endDocId);
            if (!IntRanges.isInvalid(lastRange)) {
                newPairs.add(lastRange);
            }
        }
        pairs = newPairs;
    }
    LOGGER.debug("Creating a Sorted Block with pairs: {}", pairs);
    sortedBlock = new SortedBlock(dataSource.getOperatorName(), pairs);
    return sortedBlock;
}
Also used : SortedInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader) ArrayList(java.util.ArrayList) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Aggregations

IntPair (com.linkedin.pinot.common.utils.Pairs.IntPair)1 SortedInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader)1 ArrayList (java.util.ArrayList)1