Search in sources :

Example 11 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class OrDocIdIterator method next.

@Override
public int next() {
    long start = System.currentTimeMillis();
    if (currentDocId == Constants.EOF) {
        return currentDocId;
    }
    while (queue.size() > 0 && queue.peek().getLeft() <= currentDocId) {
        IntPair pair = queue.remove();
        iteratorIsInQueue[pair.getRight()] = false;
    }
    currentDocId++;
    // Grab the next value from each iterator, if it's not in the queue
    for (int i = 0; i < docIdIterators.length; i++) {
        if (!iteratorIsInQueue[i]) {
            int nextDocId = docIdIterators[i].advance(currentDocId);
            if (nextDocId != Constants.EOF) {
                if (!(nextDocId <= maxDocId && nextDocId >= minDocId) && nextDocId >= currentDocId) {
                    throw new RuntimeException("next Doc : " + nextDocId + " should never crossing the range : [ " + minDocId + ", " + maxDocId + " ]");
                }
                queue.add(new IntPair(nextDocId, i));
            }
            iteratorIsInQueue[i] = true;
        }
    }
    if (queue.size() > 0) {
        currentDocId = queue.peek().getLeft();
    } else {
        currentDocId = Constants.EOF;
    }
    long end = System.currentTimeMillis();
    timeMeasure.addAndGet(end - start);
    return currentDocId;
}
Also used : IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Example 12 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class OffHeapStarTreeBuilder method splitLeafNodesOnTimeColumn.

/**
   * Helper method that visits each leaf node does the following:
   * - Re-orders the doc-id's corresponding to leaf node wrt time column.
   * - Create children nodes for each time value under this leaf node.
   * - Adds a new record with aggregated data for this leaf node.
   * @throws Exception
   */
private void splitLeafNodesOnTimeColumn() throws Exception {
    Queue<StarTreeIndexNode> nodes = new LinkedList<>();
    nodes.add(starTreeRootIndexNode);
    StarTreeDataSorter dataSorter = new StarTreeDataSorter(dataFile, dimensionSizeBytes, metricSizeBytes);
    while (!nodes.isEmpty()) {
        StarTreeIndexNode node = nodes.remove();
        if (node.isLeaf()) {
            // If we have time column, split on time column, helps in time based filtering
            if (timeColumnName != null) {
                int level = node.getLevel();
                int[] newSortOrder = moveColumnInSortOrder(timeColumnName, getSortOrder(), level);
                int startDocId = node.getStartDocumentId();
                int endDocId = node.getEndDocumentId();
                dataSorter.sort(startDocId, endDocId, newSortOrder);
                int timeColIndex = dimensionNameToIndexMap.get(timeColumnName);
                Map<Integer, IntPair> timeColumnRangeMap = dataSorter.groupByIntColumnCount(startDocId, endDocId, timeColIndex);
                node.setChildDimensionName(timeColIndex);
                node.setChildren(new HashMap<Integer, StarTreeIndexNode>());
                for (int timeValue : timeColumnRangeMap.keySet()) {
                    IntPair range = timeColumnRangeMap.get(timeValue);
                    StarTreeIndexNode child = new StarTreeIndexNode();
                    child.setDimensionName(timeColIndex);
                    child.setDimensionValue(timeValue);
                    child.setParent(node);
                    child.setLevel(node.getLevel() + 1);
                    child.setStartDocumentId(range.getLeft());
                    child.setEndDocumentId(range.getRight());
                    node.addChild(child, timeValue);
                }
            }
        } else {
            Iterator<StarTreeIndexNode> childrenIterator = node.getChildrenIterator();
            while (childrenIterator.hasNext()) {
                nodes.add(childrenIterator.next());
            }
        }
    }
    dataSorter.close();
}
Also used : IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) LinkedList(java.util.LinkedList)

Example 13 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class OffHeapStarTreeBuilder method constructStarTree.

private int constructStarTree(StarTreeIndexNode node, int startDocId, int endDocId, int level, File file) throws Exception {
    // node.setStartDocumentId(startDocId);
    int docsAdded = 0;
    if (level == dimensionsSplitOrder.size()) {
        return 0;
    }
    String splitDimensionName = dimensionsSplitOrder.get(level);
    Integer splitDimensionId = dimensionNameToIndexMap.get(splitDimensionName);
    LOG.debug("Building tree at level:{} using file:{} from startDoc:{} endDocId:{} splitting on dimension:{}", level, file.getName(), startDocId, endDocId, splitDimensionName);
    Map<Integer, IntPair> sortGroupBy = groupBy(startDocId, endDocId, splitDimensionId, file);
    LOG.debug("Group stats:{}", sortGroupBy);
    node.setChildDimensionName(splitDimensionId);
    node.setChildren(new HashMap<Integer, StarTreeIndexNode>());
    for (int childDimensionValue : sortGroupBy.keySet()) {
        StarTreeIndexNode child = new StarTreeIndexNode();
        child.setDimensionName(splitDimensionId);
        child.setDimensionValue(childDimensionValue);
        child.setParent(node);
        child.setLevel(node.getLevel() + 1);
        // n.b. We will number the nodes later using BFS after fully split
        // Add child to parent
        node.addChild(child, childDimensionValue);
        int childDocs = 0;
        IntPair range = sortGroupBy.get(childDimensionValue);
        if (range.getRight() - range.getLeft() > maxLeafRecords) {
            childDocs = constructStarTree(child, range.getLeft(), range.getRight(), level + 1, file);
            docsAdded += childDocs;
        }
        // Either range <= maxLeafRecords, or we did not split further (last level).
        if (childDocs == 0) {
            child.setStartDocumentId(range.getLeft());
            child.setEndDocumentId(range.getRight());
        }
    }
    // Return if star node does not need to be created.
    if (skipStarNodeCreationForDimensions != null && skipStarNodeCreationForDimensions.contains(splitDimensionName)) {
        return docsAdded;
    }
    // create star node
    StarTreeIndexNode starChild = new StarTreeIndexNode();
    starChild.setDimensionName(splitDimensionId);
    starChild.setDimensionValue(StarTreeIndexNodeInterf.ALL);
    starChild.setParent(node);
    starChild.setLevel(node.getLevel() + 1);
    // n.b. We will number the nodes later using BFS after fully split
    // Add child to parent
    node.addChild(starChild, StarTreeIndexNodeInterf.ALL);
    Iterator<Pair<DimensionBuffer, MetricBuffer>> iterator = uniqueCombinations(startDocId, endDocId, file, splitDimensionId);
    int rowsAdded = 0;
    int startOffset = rawRecordCount + aggRecordCount;
    while (iterator.hasNext()) {
        Pair<DimensionBuffer, MetricBuffer> next = iterator.next();
        DimensionBuffer dimension = next.getLeft();
        MetricBuffer metricsHolder = next.getRight();
        LOG.debug("Adding row:{}", dimension);
        appendToAggBuffer(dimension, metricsHolder);
        rowsAdded++;
    }
    docsAdded += rowsAdded;
    LOG.debug("Added {} additional records at level {}", rowsAdded, level);
    // flush
    dataBuffer.flush();
    int childDocs = 0;
    if (rowsAdded >= maxLeafRecords) {
        sort(dataFile, startOffset, startOffset + rowsAdded);
        childDocs = constructStarTree(starChild, startOffset, startOffset + rowsAdded, level + 1, dataFile);
        docsAdded += childDocs;
    }
    // Either rowsAdded < maxLeafRecords, or we did not split further (last level).
    if (childDocs == 0) {
        starChild.setStartDocumentId(startOffset);
        starChild.setEndDocumentId(startOffset + rowsAdded);
    }
    // node.setEndDocumentId(endDocId + docsAdded);
    return docsAdded;
}
Also used : IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) Pair(org.apache.commons.lang3.tuple.Pair) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Example 14 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedRangeIntersection method intersectSortedRangeSets.

public static List<IntPair> intersectSortedRangeSets(List<List<IntPair>> sortedRangeSetList) {
    if (sortedRangeSetList == null || sortedRangeSetList.size() == 0) {
        return Collections.emptyList();
    }
    if (sortedRangeSetList.size() == 1) {
        return sortedRangeSetList.get(0);
    }
    // if any list is empty return empty
    for (List<IntPair> rangeSet : sortedRangeSetList) {
        if (rangeSet.size() == 0) {
            return Collections.emptyList();
        }
    }
    int[] currentRangeSetIndex = new int[sortedRangeSetList.size()];
    Arrays.fill(currentRangeSetIndex, 0);
    int maxHead = -1;
    int maxHeadIndex = -1;
    boolean reachedEnd = false;
    List<IntPair> result = new ArrayList<IntPair>();
    while (!reachedEnd) {
        // find max Head in the current pointers
        for (int i = 0; i < sortedRangeSetList.size(); i++) {
            int head = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]).getLeft();
            if (head > maxHead) {
                maxHead = head;
                maxHeadIndex = i;
            }
        }
        // move all pointers forward such that range they point to contain maxHead
        for (int i = 0; i < sortedRangeSetList.size(); i++) {
            if (i == maxHeadIndex) {
                continue;
            }
            boolean found = false;
            while (!found && currentRangeSetIndex[i] < sortedRangeSetList.get(i).size()) {
                IntPair range = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
                if (maxHead >= range.getLeft() && maxHead <= range.getRight()) {
                    found = true;
                    break;
                }
                if (range.getLeft() > maxHead) {
                    maxHead = range.getLeft();
                    maxHeadIndex = i;
                    i = -1;
                    break;
                }
                currentRangeSetIndex[i] = currentRangeSetIndex[i] + 1;
            }
            // new maxHead found
            if (i == -1) {
                continue;
            }
            if (!found) {
                reachedEnd = true;
                break;
            }
        }
        if (reachedEnd) {
            break;
        }
        // there is definitely some intersection possible here
        IntPair intPair = sortedRangeSetList.get(0).get(currentRangeSetIndex[0]);
        IntPair intersection = Pairs.intPair(intPair.getLeft(), intPair.getRight());
        for (int i = 1; i < sortedRangeSetList.size(); i++) {
            IntPair pair = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
            int start = Math.max(intersection.getLeft(), pair.getLeft());
            int end = Math.min(intersection.getRight(), pair.getRight());
            intersection.setLeft(start);
            intersection.setRight(end);
        }
        if (result.size() > 0) {
            // if new range is contiguous merge it
            IntPair prevIntersection = result.get(result.size() - 1);
            if (intersection.getLeft() == prevIntersection.getRight() + 1) {
                prevIntersection.setRight(intersection.getRight());
            } else {
                result.add(intersection);
            }
        } else {
            result.add(intersection);
        }
        // move the pointers forward for rangesets where the currenttail == intersection.tail
        for (int i = 0; i < sortedRangeSetList.size(); i++) {
            IntPair pair = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
            if (pair.getRight() == intersection.getRight()) {
                currentRangeSetIndex[i] = currentRangeSetIndex[i] + 1;
                if (currentRangeSetIndex[i] == sortedRangeSetList.get(i).size()) {
                    reachedEnd = true;
                    break;
                }
            }
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Example 15 with IntPair

use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.

the class SortedDocIdSetTest method testEmpty.

@Test
public void testEmpty() {
    List<IntPair> pairs = new ArrayList<IntPair>();
    SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
    BlockDocIdIterator iterator = sortedDocIdSet.iterator();
    List<Integer> result = new ArrayList<Integer>();
    int docId;
    while ((docId = iterator.next()) != Constants.EOF) {
        result.add(docId);
    }
    Assert.assertTrue(result.isEmpty(), "Expected empty result set but got:" + result);
}
Also used : ArrayList(java.util.ArrayList) SortedDocIdSet(com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair) BlockDocIdIterator(com.linkedin.pinot.core.common.BlockDocIdIterator) Test(org.testng.annotations.Test)

Aggregations

IntPair (com.linkedin.pinot.common.utils.Pairs.IntPair)16 ArrayList (java.util.ArrayList)10 BlockDocIdIterator (com.linkedin.pinot.core.common.BlockDocIdIterator)7 SortedDocIdSet (com.linkedin.pinot.core.operator.docidsets.SortedDocIdSet)5 Test (org.testng.annotations.Test)5 BlockDocIdSet (com.linkedin.pinot.core.common.BlockDocIdSet)2 AndDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.AndDocIdIterator)2 RangelessBitmapDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator)2 List (java.util.List)2 ImmutableRoaringBitmap (org.roaringbitmap.buffer.ImmutableRoaringBitmap)2 MutableRoaringBitmap (org.roaringbitmap.buffer.MutableRoaringBitmap)2 BitmapDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.BitmapDocIdIterator)1 ScanBasedDocIdIterator (com.linkedin.pinot.core.operator.dociditerators.ScanBasedDocIdIterator)1 SortedInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.SortedInvertedIndexReader)1 Int2ObjectLinkedOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1