Search in sources :

Example 6 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedDocIdSetTest method testTwoPair.

@Test
public void testTwoPair() {
    List<IntPair> pairs = new ArrayList<IntPair>();
    pairs.add(Pairs.intPair(90, 99));
    pairs.add(Pairs.intPair(100, 109));
    SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
    BlockDocIdIterator iterator = sortedDocIdSet.iterator();
    List<Integer> result = new ArrayList<Integer>();
    int docId;
    while ((docId = iterator.next()) != Constants.EOF) {
        result.add(docId);
    }
    Assert.assertEquals(20, result.size());
}
Also used : ArrayList(java.util.ArrayList) SortedDocIdSet(com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) BlockDocIdIterator(com.linkedin.pinot.core.common.BlockDocIdIterator) Test(org.testng.annotations.Test)

Example 7 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedDocIdSetTest method testPairWithSameStartAndEnd.

@Test
public void testPairWithSameStartAndEnd() {
    List<IntPair> pairs = new ArrayList<IntPair>();
    pairs.add(Pairs.intPair(1, 1));
    SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
    BlockDocIdIterator iterator = sortedDocIdSet.iterator();
    List<Integer> result = new ArrayList<Integer>();
    int docId;
    while ((docId = iterator.next()) != Constants.EOF) {
        result.add(docId);
    }
    Assert.assertEquals(1, result.size());
}
Also used : ArrayList(java.util.ArrayList) SortedDocIdSet(com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) BlockDocIdIterator(com.linkedin.pinot.core.common.BlockDocIdIterator) Test(org.testng.annotations.Test)

Example 8 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedDocIdSetTest method testOnePair.

@Test
public void testOnePair() {
    List<IntPair> pairs = new ArrayList<IntPair>();
    pairs.add(Pairs.intPair(0, 9));
    SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
    BlockDocIdIterator iterator = sortedDocIdSet.iterator();
    List<Integer> result = new ArrayList<Integer>();
    int docId;
    while ((docId = iterator.next()) != Constants.EOF) {
        result.add(docId);
    }
    Assert.assertEquals(10, result.size());
}
Also used : ArrayList(java.util.ArrayList) SortedDocIdSet(com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) BlockDocIdIterator(com.linkedin.pinot.core.common.BlockDocIdIterator) Test(org.testng.annotations.Test)

Example 9 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedRangeIntersectionTest method testRandom.

@Test
public void testRandom() {
    // 1 million docs
    int totalDocs = 1000000;
    int maxRange = 10000;
    int minRange = 1000;
    long randomSeed = System.currentTimeMillis();
    Random r = new Random(randomSeed);
    int numLists = 3;
    List<List<IntPair>> sortedRangePairsList = new ArrayList<>();
    List<Set<Integer>> rawIdSetList = new ArrayList<>();
    for (int i = 0; i < numLists; i++) {
        List<IntPair> pairs = new ArrayList<>();
        Set<Integer> rawIdSet = new HashSet<>();
        int docId = 0;
        while (docId < totalDocs) {
            int start = docId + r.nextInt(maxRange);
            int end = start + Math.max(minRange, r.nextInt(maxRange));
            if (end < totalDocs) {
                pairs.add(Pairs.intPair(start, end));
                for (int id = start; id <= end; id++) {
                    rawIdSet.add(id);
                }
            }
            docId = end + 1;
        }
        sortedRangePairsList.add(pairs);
        rawIdSetList.add(rawIdSet);
    }
    // expected intersection
    List<IntPair> expected = new ArrayList<>();
    int tempStart = -1;
    for (int id = 0; id < totalDocs; id++) {
        boolean foundInAll = true;
        for (int i = 0; i < numLists; i++) {
            if (!rawIdSetList.get(i).contains(id)) {
                foundInAll = false;
                break;
            }
        }
        if (foundInAll) {
            if (tempStart == -1) {
                tempStart = id;
            }
        } else {
            if (tempStart != -1) {
                expected.add(Pairs.intPair(tempStart, id - 1));
                tempStart = -1;
            }
        }
    }
    List<IntPair> actual = SortedRangeIntersection.intersectSortedRangeSets(sortedRangePairsList);
    if (!actual.equals(expected)) {
        LOGGER.error("Actual pairs not equal to expected pairs.");
        LOGGER.error("Actual pairs: {}", actual);
        LOGGER.error("Expected pairs: {}", expected);
        LOGGER.error("Random seed: {}", randomSeed);
        LOGGER.error("Sorted range pairs list: {}", sortedRangePairsList);
        Assert.fail();
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 10 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedInvertedIndexBasedFilterOperator method nextFilterBlock.

@Override
public BaseFilterBlock nextFilterBlock(BlockId BlockId) {
    final SortedInvertedIndexReader invertedIndex = (SortedInvertedIndexReader) dataSource.getInvertedIndex();
    List<IntPair> pairs = new ArrayList<IntPair>();
    // At this point, we need to create a list of matching docId ranges. There are two kinds of operators:
    //
    // - "Additive" operators, such as EQ, IN and RANGE build up a list of ranges and merge overlapping/adjacent ones,
    //   clipping the ranges to [startDocId; endDocId]
    //
    // - "Subtractive" operators, such as NEQ and NOT IN build up a list of ranges that do not match and build a list of
    //   matching intervals by subtracting a list of non-matching intervals from the given range of
    //   [startDocId; endDocId]
    //
    // For now, we don't look at the cardinality of the column's dictionary, although we should do that if someone
    // specifies a very large list of IN/NOT IN predicates relative to the column's cardinality or a very large/small
    // range predicate relative to the cardinality. However, as adjacent ranges get merged before returning the final
    // list of ranges, the only drawback is that we use a lot of memory during the filter block evaluation.
    final int[] dictionaryIds;
    boolean additiveRanges = true;
    switch(predicate.getType()) {
        case EQ:
        case IN:
        case RANGE:
            dictionaryIds = predicateEvaluator.getMatchingDictionaryIds();
            break;
        case NEQ:
        case NOT_IN:
            additiveRanges = false;
            dictionaryIds = predicateEvaluator.getNonMatchingDictionaryIds();
            break;
        case REGEX:
            throw new RuntimeException("Regex is not supported");
        default:
            throw new RuntimeException("Unimplemented!");
    }
    if (0 < dictionaryIds.length) {
        // Sort the dictionaryIds in ascending order, so that their respective ranges are adjacent if their
        // dictionaryIds are adjacent
        Arrays.sort(dictionaryIds);
        IntPair lastPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[0]);
        IntRanges.clip(lastPair, startDocId, endDocId);
        for (int i = 1; i < dictionaryIds.length; i++) {
            IntPair currentPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[i]);
            IntRanges.clip(currentPair, startDocId, endDocId);
            // If the previous range is degenerate, just keep the current one
            if (IntRanges.isInvalid(lastPair)) {
                lastPair = currentPair;
                continue;
            }
            // otherwise add the previous range and keep the current one to be added
            if (IntRanges.rangesAreMergeable(lastPair, currentPair)) {
                IntRanges.mergeIntoFirst(lastPair, currentPair);
            } else {
                if (!IntRanges.isInvalid(lastPair)) {
                    pairs.add(lastPair);
                }
                lastPair = currentPair;
            }
        }
        // Add the last range if it's valid
        if (!IntRanges.isInvalid(lastPair)) {
            pairs.add(lastPair);
        }
    }
    if (!additiveRanges) {
        // If the ranges are not additive ranges, our list of pairs is a list of "holes" in the [startDocId; endDocId]
        // range. We need to take this list of pairs and invert it. To do so, there are three cases:
        //
        // - No holes, in which case the final range is [startDocId; endDocId]
        // - One or more hole, in which case the final ranges are [startDocId; firstHoleStartDocId - 1] and
        //   [lastHoleEndDocId + 1; endDocId] and ranges in between other holes
        List<IntPair> newPairs = new ArrayList<>();
        if (pairs.isEmpty()) {
            newPairs.add(new IntPair(startDocId, endDocId));
        } else {
            // Add the first filled area (between startDocId and the first hole)
            IntPair firstHole = pairs.get(0);
            IntPair firstRange = new IntPair(startDocId, firstHole.getLeft() - 1);
            if (!IntRanges.isInvalid(firstRange)) {
                newPairs.add(firstRange);
            }
            // Add the filled areas between contiguous holes
            int pairCount = pairs.size();
            for (int i = 1; i < pairCount; i++) {
                IntPair previousHole = pairs.get(i - 1);
                IntPair currentHole = pairs.get(i);
                IntPair range = new IntPair(previousHole.getRight() + 1, currentHole.getLeft() - 1);
                if (!IntRanges.isInvalid(range)) {
                    newPairs.add(range);
                }
            }
            // Add the last filled area (between the last hole and endDocId)
            IntPair lastHole = pairs.get(pairs.size() - 1);
            IntPair lastRange = new IntPair(lastHole.getRight() + 1, endDocId);
            if (!IntRanges.isInvalid(lastRange)) {
                newPairs.add(lastRange);
            }
        }
        pairs = newPairs;
    }
    LOGGER.debug("Creating a Sorted Block with pairs: {}", pairs);
    sortedBlock = new SortedBlock(dataSource.getOperatorName(), pairs);
    return sortedBlock;
}
Also used : SortedInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader) ArrayList(java.util.ArrayList) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Aggregations

IntPair (com.linkedin.pinot.common.utils.Pairs.IntPair)16 ArrayList (java.util.ArrayList)10 BlockDocIdIterator (com.linkedin.pinot.core.common.BlockDocIdIterator)7 SortedDocIdSet (com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet)5 Test (org.testng.annotations.Test)5 BlockDocIdSet (com.linkedin.pinot.core.common.BlockDocIdSet)2 AndDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.AndDocIdIterator)2 RangelessBitmapDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator)2 List (java.util.List)2 ImmutableRoaringBitmap (org.roaringbitmap.buffer.ImmutableRoaringBitmap)2 MutableRoaringBitmap (org.roaringbitmap.buffer.MutableRoaringBitmap)2 BitmapDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.BitmapDocIdIterator)1 ScanBasedDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.ScanBasedDocIdIterator)1 SortedInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader)1 Int2ObjectLinkedOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1