use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedDocIdSetTest method testTwoPair.
@Test
public void testTwoPair() {
List<IntPair> pairs = new ArrayList<IntPair>();
pairs.add(Pairs.intPair(90, 99));
pairs.add(Pairs.intPair(100, 109));
SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
BlockDocIdIterator iterator = sortedDocIdSet.iterator();
List<Integer> result = new ArrayList<Integer>();
int docId;
while ((docId = iterator.next()) != Constants.EOF) {
result.add(docId);
}
Assert.assertEquals(20, result.size());
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedDocIdSetTest method testPairWithSameStartAndEnd.
@Test
public void testPairWithSameStartAndEnd() {
List<IntPair> pairs = new ArrayList<IntPair>();
pairs.add(Pairs.intPair(1, 1));
SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
BlockDocIdIterator iterator = sortedDocIdSet.iterator();
List<Integer> result = new ArrayList<Integer>();
int docId;
while ((docId = iterator.next()) != Constants.EOF) {
result.add(docId);
}
Assert.assertEquals(1, result.size());
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedDocIdSetTest method testOnePair.
@Test
public void testOnePair() {
List<IntPair> pairs = new ArrayList<IntPair>();
pairs.add(Pairs.intPair(0, 9));
SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
BlockDocIdIterator iterator = sortedDocIdSet.iterator();
List<Integer> result = new ArrayList<Integer>();
int docId;
while ((docId = iterator.next()) != Constants.EOF) {
result.add(docId);
}
Assert.assertEquals(10, result.size());
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedRangeIntersectionTest method testRandom.
@Test
public void testRandom() {
// 1 million docs
int totalDocs = 1000000;
int maxRange = 10000;
int minRange = 1000;
long randomSeed = System.currentTimeMillis();
Random r = new Random(randomSeed);
int numLists = 3;
List<List<IntPair>> sortedRangePairsList = new ArrayList<>();
List<Set<Integer>> rawIdSetList = new ArrayList<>();
for (int i = 0; i < numLists; i++) {
List<IntPair> pairs = new ArrayList<>();
Set<Integer> rawIdSet = new HashSet<>();
int docId = 0;
while (docId < totalDocs) {
int start = docId + r.nextInt(maxRange);
int end = start + Math.max(minRange, r.nextInt(maxRange));
if (end < totalDocs) {
pairs.add(Pairs.intPair(start, end));
for (int id = start; id <= end; id++) {
rawIdSet.add(id);
}
}
docId = end + 1;
}
sortedRangePairsList.add(pairs);
rawIdSetList.add(rawIdSet);
}
// expected intersection
List<IntPair> expected = new ArrayList<>();
int tempStart = -1;
for (int id = 0; id < totalDocs; id++) {
boolean foundInAll = true;
for (int i = 0; i < numLists; i++) {
if (!rawIdSetList.get(i).contains(id)) {
foundInAll = false;
break;
}
}
if (foundInAll) {
if (tempStart == -1) {
tempStart = id;
}
} else {
if (tempStart != -1) {
expected.add(Pairs.intPair(tempStart, id - 1));
tempStart = -1;
}
}
}
List<IntPair> actual = SortedRangeIntersection.intersectSortedRangeSets(sortedRangePairsList);
if (!actual.equals(expected)) {
LOGGER.error("Actual pairs not equal to expected pairs.");
LOGGER.error("Actual pairs: {}", actual);
LOGGER.error("Expected pairs: {}", expected);
LOGGER.error("Random seed: {}", randomSeed);
LOGGER.error("Sorted range pairs list: {}", sortedRangePairsList);
Assert.fail();
}
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedInvertedIndexBasedFilterOperator method nextFilterBlock.
@Override
public BaseFilterBlock nextFilterBlock(BlockId BlockId) {
final SortedInvertedIndexReader invertedIndex = (SortedInvertedIndexReader) dataSource.getInvertedIndex();
List<IntPair> pairs = new ArrayList<IntPair>();
// At this point, we need to create a list of matching docId ranges. There are two kinds of operators:
//
// - "Additive" operators, such as EQ, IN and RANGE build up a list of ranges and merge overlapping/adjacent ones,
// clipping the ranges to [startDocId; endDocId]
//
// - "Subtractive" operators, such as NEQ and NOT IN build up a list of ranges that do not match and build a list of
// matching intervals by subtracting a list of non-matching intervals from the given range of
// [startDocId; endDocId]
//
// For now, we don't look at the cardinality of the column's dictionary, although we should do that if someone
// specifies a very large list of IN/NOT IN predicates relative to the column's cardinality or a very large/small
// range predicate relative to the cardinality. However, as adjacent ranges get merged before returning the final
// list of ranges, the only drawback is that we use a lot of memory during the filter block evaluation.
final int[] dictionaryIds;
boolean additiveRanges = true;
switch(predicate.getType()) {
case EQ:
case IN:
case RANGE:
dictionaryIds = predicateEvaluator.getMatchingDictionaryIds();
break;
case NEQ:
case NOT_IN:
additiveRanges = false;
dictionaryIds = predicateEvaluator.getNonMatchingDictionaryIds();
break;
case REGEX:
throw new RuntimeException("Regex is not supported");
default:
throw new RuntimeException("Unimplemented!");
}
if (0 < dictionaryIds.length) {
// Sort the dictionaryIds in ascending order, so that their respective ranges are adjacent if their
// dictionaryIds are adjacent
Arrays.sort(dictionaryIds);
IntPair lastPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[0]);
IntRanges.clip(lastPair, startDocId, endDocId);
for (int i = 1; i < dictionaryIds.length; i++) {
IntPair currentPair = invertedIndex.getMinMaxRangeFor(dictionaryIds[i]);
IntRanges.clip(currentPair, startDocId, endDocId);
// If the previous range is degenerate, just keep the current one
if (IntRanges.isInvalid(lastPair)) {
lastPair = currentPair;
continue;
}
// otherwise add the previous range and keep the current one to be added
if (IntRanges.rangesAreMergeable(lastPair, currentPair)) {
IntRanges.mergeIntoFirst(lastPair, currentPair);
} else {
if (!IntRanges.isInvalid(lastPair)) {
pairs.add(lastPair);
}
lastPair = currentPair;
}
}
// Add the last range if it's valid
if (!IntRanges.isInvalid(lastPair)) {
pairs.add(lastPair);
}
}
if (!additiveRanges) {
// If the ranges are not additive ranges, our list of pairs is a list of "holes" in the [startDocId; endDocId]
// range. We need to take this list of pairs and invert it. To do so, there are three cases:
//
// - No holes, in which case the final range is [startDocId; endDocId]
// - One or more hole, in which case the final ranges are [startDocId; firstHoleStartDocId - 1] and
// [lastHoleEndDocId + 1; endDocId] and ranges in between other holes
List<IntPair> newPairs = new ArrayList<>();
if (pairs.isEmpty()) {
newPairs.add(new IntPair(startDocId, endDocId));
} else {
// Add the first filled area (between startDocId and the first hole)
IntPair firstHole = pairs.get(0);
IntPair firstRange = new IntPair(startDocId, firstHole.getLeft() - 1);
if (!IntRanges.isInvalid(firstRange)) {
newPairs.add(firstRange);
}
// Add the filled areas between contiguous holes
int pairCount = pairs.size();
for (int i = 1; i < pairCount; i++) {
IntPair previousHole = pairs.get(i - 1);
IntPair currentHole = pairs.get(i);
IntPair range = new IntPair(previousHole.getRight() + 1, currentHole.getLeft() - 1);
if (!IntRanges.isInvalid(range)) {
newPairs.add(range);
}
}
// Add the last filled area (between the last hole and endDocId)
IntPair lastHole = pairs.get(pairs.size() - 1);
IntPair lastRange = new IntPair(lastHole.getRight() + 1, endDocId);
if (!IntRanges.isInvalid(lastRange)) {
newPairs.add(lastRange);
}
}
pairs = newPairs;
}
LOGGER.debug("Creating a Sorted Block with pairs: {}", pairs);
sortedBlock = new SortedBlock(dataSource.getOperatorName(), pairs);
return sortedBlock;
}
Aggregations