use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class OrDocIdIterator method next.
@Override
public int next() {
long start = System.currentTimeMillis();
if (currentDocId == Constants.EOF) {
return currentDocId;
}
while (queue.size() > 0 && queue.peek().getLeft() <= currentDocId) {
IntPair pair = queue.remove();
iteratorIsInQueue[pair.getRight()] = false;
}
currentDocId++;
// Grab the next value from each iterator, if it's not in the queue
for (int i = 0; i < docIdIterators.length; i++) {
if (!iteratorIsInQueue[i]) {
int nextDocId = docIdIterators[i].advance(currentDocId);
if (nextDocId != Constants.EOF) {
if (!(nextDocId <= maxDocId && nextDocId >= minDocId) && nextDocId >= currentDocId) {
throw new RuntimeException("next Doc : " + nextDocId + " should never crossing the range : [ " + minDocId + ", " + maxDocId + " ]");
}
queue.add(new IntPair(nextDocId, i));
}
iteratorIsInQueue[i] = true;
}
}
if (queue.size() > 0) {
currentDocId = queue.peek().getLeft();
} else {
currentDocId = Constants.EOF;
}
long end = System.currentTimeMillis();
timeMeasure.addAndGet(end - start);
return currentDocId;
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class OffHeapStarTreeBuilder method splitLeafNodesOnTimeColumn.
/**
* Helper method that visits each leaf node does the following:
* - Re-orders the doc-id's corresponding to leaf node wrt time column.
* - Create children nodes for each time value under this leaf node.
* - Adds a new record with aggregated data for this leaf node.
* @throws Exception
*/
private void splitLeafNodesOnTimeColumn() throws Exception {
Queue<StarTreeIndexNode> nodes = new LinkedList<>();
nodes.add(starTreeRootIndexNode);
StarTreeDataSorter dataSorter = new StarTreeDataSorter(dataFile, dimensionSizeBytes, metricSizeBytes);
while (!nodes.isEmpty()) {
StarTreeIndexNode node = nodes.remove();
if (node.isLeaf()) {
// If we have time column, split on time column, helps in time based filtering
if (timeColumnName != null) {
int level = node.getLevel();
int[] newSortOrder = moveColumnInSortOrder(timeColumnName, getSortOrder(), level);
int startDocId = node.getStartDocumentId();
int endDocId = node.getEndDocumentId();
dataSorter.sort(startDocId, endDocId, newSortOrder);
int timeColIndex = dimensionNameToIndexMap.get(timeColumnName);
Map<Integer, IntPair> timeColumnRangeMap = dataSorter.groupByIntColumnCount(startDocId, endDocId, timeColIndex);
node.setChildDimensionName(timeColIndex);
node.setChildren(new HashMap<Integer, StarTreeIndexNode>());
for (int timeValue : timeColumnRangeMap.keySet()) {
IntPair range = timeColumnRangeMap.get(timeValue);
StarTreeIndexNode child = new StarTreeIndexNode();
child.setDimensionName(timeColIndex);
child.setDimensionValue(timeValue);
child.setParent(node);
child.setLevel(node.getLevel() + 1);
child.setStartDocumentId(range.getLeft());
child.setEndDocumentId(range.getRight());
node.addChild(child, timeValue);
}
}
} else {
Iterator<StarTreeIndexNode> childrenIterator = node.getChildrenIterator();
while (childrenIterator.hasNext()) {
nodes.add(childrenIterator.next());
}
}
}
dataSorter.close();
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class OffHeapStarTreeBuilder method constructStarTree.
private int constructStarTree(StarTreeIndexNode node, int startDocId, int endDocId, int level, File file) throws Exception {
// node.setStartDocumentId(startDocId);
int docsAdded = 0;
if (level == dimensionsSplitOrder.size()) {
return 0;
}
String splitDimensionName = dimensionsSplitOrder.get(level);
Integer splitDimensionId = dimensionNameToIndexMap.get(splitDimensionName);
LOG.debug("Building tree at level:{} using file:{} from startDoc:{} endDocId:{} splitting on dimension:{}", level, file.getName(), startDocId, endDocId, splitDimensionName);
Map<Integer, IntPair> sortGroupBy = groupBy(startDocId, endDocId, splitDimensionId, file);
LOG.debug("Group stats:{}", sortGroupBy);
node.setChildDimensionName(splitDimensionId);
node.setChildren(new HashMap<Integer, StarTreeIndexNode>());
for (int childDimensionValue : sortGroupBy.keySet()) {
StarTreeIndexNode child = new StarTreeIndexNode();
child.setDimensionName(splitDimensionId);
child.setDimensionValue(childDimensionValue);
child.setParent(node);
child.setLevel(node.getLevel() + 1);
// n.b. We will number the nodes later using BFS after fully split
// Add child to parent
node.addChild(child, childDimensionValue);
int childDocs = 0;
IntPair range = sortGroupBy.get(childDimensionValue);
if (range.getRight() - range.getLeft() > maxLeafRecords) {
childDocs = constructStarTree(child, range.getLeft(), range.getRight(), level + 1, file);
docsAdded += childDocs;
}
// Either range <= maxLeafRecords, or we did not split further (last level).
if (childDocs == 0) {
child.setStartDocumentId(range.getLeft());
child.setEndDocumentId(range.getRight());
}
}
// Return if star node does not need to be created.
if (skipStarNodeCreationForDimensions != null && skipStarNodeCreationForDimensions.contains(splitDimensionName)) {
return docsAdded;
}
// create star node
StarTreeIndexNode starChild = new StarTreeIndexNode();
starChild.setDimensionName(splitDimensionId);
starChild.setDimensionValue(StarTreeIndexNodeInterf.ALL);
starChild.setParent(node);
starChild.setLevel(node.getLevel() + 1);
// n.b. We will number the nodes later using BFS after fully split
// Add child to parent
node.addChild(starChild, StarTreeIndexNodeInterf.ALL);
Iterator<Pair<DimensionBuffer, MetricBuffer>> iterator = uniqueCombinations(startDocId, endDocId, file, splitDimensionId);
int rowsAdded = 0;
int startOffset = rawRecordCount + aggRecordCount;
while (iterator.hasNext()) {
Pair<DimensionBuffer, MetricBuffer> next = iterator.next();
DimensionBuffer dimension = next.getLeft();
MetricBuffer metricsHolder = next.getRight();
LOG.debug("Adding row:{}", dimension);
appendToAggBuffer(dimension, metricsHolder);
rowsAdded++;
}
docsAdded += rowsAdded;
LOG.debug("Added {} additional records at level {}", rowsAdded, level);
// flush
dataBuffer.flush();
int childDocs = 0;
if (rowsAdded >= maxLeafRecords) {
sort(dataFile, startOffset, startOffset + rowsAdded);
childDocs = constructStarTree(starChild, startOffset, startOffset + rowsAdded, level + 1, dataFile);
docsAdded += childDocs;
}
// Either rowsAdded < maxLeafRecords, or we did not split further (last level).
if (childDocs == 0) {
starChild.setStartDocumentId(startOffset);
starChild.setEndDocumentId(startOffset + rowsAdded);
}
// node.setEndDocumentId(endDocId + docsAdded);
return docsAdded;
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedRangeIntersection method intersectSortedRangeSets.
public static List<IntPair> intersectSortedRangeSets(List<List<IntPair>> sortedRangeSetList) {
if (sortedRangeSetList == null || sortedRangeSetList.size() == 0) {
return Collections.emptyList();
}
if (sortedRangeSetList.size() == 1) {
return sortedRangeSetList.get(0);
}
// if any list is empty return empty
for (List<IntPair> rangeSet : sortedRangeSetList) {
if (rangeSet.size() == 0) {
return Collections.emptyList();
}
}
int[] currentRangeSetIndex = new int[sortedRangeSetList.size()];
Arrays.fill(currentRangeSetIndex, 0);
int maxHead = -1;
int maxHeadIndex = -1;
boolean reachedEnd = false;
List<IntPair> result = new ArrayList<IntPair>();
while (!reachedEnd) {
// find max Head in the current pointers
for (int i = 0; i < sortedRangeSetList.size(); i++) {
int head = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]).getLeft();
if (head > maxHead) {
maxHead = head;
maxHeadIndex = i;
}
}
// move all pointers forward such that range they point to contain maxHead
for (int i = 0; i < sortedRangeSetList.size(); i++) {
if (i == maxHeadIndex) {
continue;
}
boolean found = false;
while (!found && currentRangeSetIndex[i] < sortedRangeSetList.get(i).size()) {
IntPair range = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
if (maxHead >= range.getLeft() && maxHead <= range.getRight()) {
found = true;
break;
}
if (range.getLeft() > maxHead) {
maxHead = range.getLeft();
maxHeadIndex = i;
i = -1;
break;
}
currentRangeSetIndex[i] = currentRangeSetIndex[i] + 1;
}
// new maxHead found
if (i == -1) {
continue;
}
if (!found) {
reachedEnd = true;
break;
}
}
if (reachedEnd) {
break;
}
// there is definitely some intersection possible here
IntPair intPair = sortedRangeSetList.get(0).get(currentRangeSetIndex[0]);
IntPair intersection = Pairs.intPair(intPair.getLeft(), intPair.getRight());
for (int i = 1; i < sortedRangeSetList.size(); i++) {
IntPair pair = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
int start = Math.max(intersection.getLeft(), pair.getLeft());
int end = Math.min(intersection.getRight(), pair.getRight());
intersection.setLeft(start);
intersection.setRight(end);
}
if (result.size() > 0) {
// if new range is contiguous merge it
IntPair prevIntersection = result.get(result.size() - 1);
if (intersection.getLeft() == prevIntersection.getRight() + 1) {
prevIntersection.setRight(intersection.getRight());
} else {
result.add(intersection);
}
} else {
result.add(intersection);
}
// move the pointers forward for rangesets where the currenttail == intersection.tail
for (int i = 0; i < sortedRangeSetList.size(); i++) {
IntPair pair = sortedRangeSetList.get(i).get(currentRangeSetIndex[i]);
if (pair.getRight() == intersection.getRight()) {
currentRangeSetIndex[i] = currentRangeSetIndex[i] + 1;
if (currentRangeSetIndex[i] == sortedRangeSetList.get(i).size()) {
reachedEnd = true;
break;
}
}
}
}
return result;
}
use of com.linkedin.pinot.common.utils.Pairs.IntPair in project pinot by linkedin.
the class SortedDocIdSetTest method testEmpty.
@Test
public void testEmpty() {
List<IntPair> pairs = new ArrayList<IntPair>();
SortedDocIdSet sortedDocIdSet = new SortedDocIdSet("Datasource-testCol", pairs);
BlockDocIdIterator iterator = sortedDocIdSet.iterator();
List<Integer> result = new ArrayList<Integer>();
int docId;
while ((docId = iterator.next()) != Constants.EOF) {
result.add(docId);
}
Assert.assertTrue(result.isEmpty(), "Expected empty result set but got:" + result);
}
Aggregations