Search in sources :

Example 1 with LeafBucketCollector

use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.

the class BestBucketsDeferringCollector method prepareSelectedBuckets.

/**
     * Replay the wrapped collector, but only on a selection of buckets.
     */
@Override
public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
    if (!finished) {
        throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called");
    }
    if (this.selectedBuckets != null) {
        throw new IllegalStateException("Already been replayed");
    }
    final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
    for (long bucket : selectedBuckets) {
        hash.add(bucket);
    }
    this.selectedBuckets = hash;
    boolean needsScores = collector.needsScores();
    Weight weight = null;
    if (needsScores) {
        weight = searchContext.searcher().createNormalizedWeight(searchContext.query(), true);
    }
    for (Entry entry : entries) {
        final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
        DocIdSetIterator docIt = null;
        if (needsScores && entry.docDeltas.size() > 0) {
            Scorer scorer = weight.scorer(entry.context);
            // We don't need to check if the scorer is null
            // since we are sure that there are documents to replay (entry.docDeltas it not empty).
            docIt = scorer.iterator();
            leafCollector.setScorer(scorer);
        }
        final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
        final PackedLongValues.Iterator buckets = entry.buckets.iterator();
        int doc = 0;
        for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
            doc += docDeltaIterator.next();
            final long bucket = buckets.next();
            final long rebasedBucket = hash.find(bucket);
            if (rebasedBucket != -1) {
                if (needsScores) {
                    if (docIt.docID() < doc) {
                        docIt.advance(doc);
                    }
                    // aggregations should only be replayed on matching documents
                    assert docIt.docID() == doc;
                }
                leafCollector.collect(doc, rebasedBucket);
            }
        }
    }
    collector.postCollection();
}
Also used : PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) LongHash(org.elasticsearch.common.util.LongHash) LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) Scorer(org.apache.lucene.search.Scorer) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Weight(org.apache.lucene.search.Weight)

Example 2 with LeafBucketCollector

use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.

the class ParentToChildrenAggregator method getLeafCollector.

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
    if (valuesSource == null) {
        return LeafBucketCollector.NO_OP_COLLECTOR;
    }
    final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
    assert globalOrdinals != null;
    Scorer parentScorer = parentFilter.scorer(ctx);
    final Bits parentDocs = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), parentScorer);
    return new LeafBucketCollector() {

        @Override
        public void collect(int docId, long bucket) throws IOException {
            if (parentDocs.get(docId)) {
                long globalOrdinal = globalOrdinals.getOrd(docId);
                if (globalOrdinal != -1) {
                    if (parentOrdToBuckets.get(globalOrdinal) == -1) {
                        parentOrdToBuckets.set(globalOrdinal, bucket);
                    } else {
                        long[] bucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
                        if (bucketOrds != null) {
                            bucketOrds = Arrays.copyOf(bucketOrds, bucketOrds.length + 1);
                            bucketOrds[bucketOrds.length - 1] = bucket;
                            parentOrdToOtherBuckets.put(globalOrdinal, bucketOrds);
                        } else {
                            parentOrdToOtherBuckets.put(globalOrdinal, new long[] { bucket });
                        }
                        multipleBucketsPerParentOrd = true;
                    }
                }
            }
        }
    };
}
Also used : LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) Scorer(org.apache.lucene.search.Scorer) Bits(org.apache.lucene.util.Bits) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 3 with LeafBucketCollector

use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.

the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.

private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
    final Set<BytesRef> termSet = new HashSet<>();
    final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
    while (termSet.size() < numTerms) {
        termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
    Arrays.sort(terms);
    final int numRanges = randomIntBetween(1, 10);
    BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
    for (int i = 0; i < numRanges; ++i) {
        ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
    FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
    final int[] counts = new int[ranges.length];
    SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {

        @Override
        protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
            counts[(int) bucket]++;
        }
    };
    final int[] expectedCounts = new int[ranges.length];
    final int maxDoc = randomIntBetween(5, 10);
    for (int doc = 0; doc < maxDoc; ++doc) {
        LongHashSet ordinalSet = new LongHashSet();
        final int numValues = randomInt(maxNumValuesPerDoc);
        while (ordinalSet.size() < numValues) {
            ordinalSet.add(random().nextInt(terms.length));
        }
        final long[] ords = ordinalSet.toArray();
        Arrays.sort(ords);
        values.ords = ords;
        // simulate aggregation
        collector.collect(doc);
        // now do it the naive way
        for (int i = 0; i < ranges.length; ++i) {
            for (long ord : ords) {
                BytesRef term = terms[(int) ord];
                if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
                    expectedCounts[i]++;
                    break;
                }
            }
        }
    }
    assertArrayEquals(expectedCounts, counts);
}
Also used : SortedBinaryRangeLeafCollector(org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedBinaryRangeLeafCollector) LongHashSet(com.carrotsearch.hppc.LongHashSet) LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) LongHashSet(com.carrotsearch.hppc.LongHashSet)

Example 4 with LeafBucketCollector

use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.

the class ParentToChildrenAggregator method doPostCollection.

@Override
protected void doPostCollection() throws IOException {
    IndexReader indexReader = context().searcher().getIndexReader();
    for (LeafReaderContext ctx : indexReader.leaves()) {
        Scorer childDocsScorer = childFilter.scorer(ctx);
        if (childDocsScorer == null) {
            continue;
        }
        DocIdSetIterator childDocsIter = childDocsScorer.iterator();
        final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
        final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
        // Set the scorer, since we now replay only the child docIds
        sub.setScorer(new ConstantScoreScorer(null, 1f, childDocsIter));
        final Bits liveDocs = ctx.reader().getLiveDocs();
        for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) {
            if (liveDocs != null && liveDocs.get(docId) == false) {
                continue;
            }
            long globalOrdinal = globalOrdinals.getOrd(docId);
            if (globalOrdinal != -1) {
                long bucketOrd = parentOrdToBuckets.get(globalOrdinal);
                if (bucketOrd != -1) {
                    collectBucket(sub, docId, bucketOrd);
                    if (multipleBucketsPerParentOrd) {
                        long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
                        if (otherBucketOrds != null) {
                            for (long otherBucketOrd : otherBucketOrds) {
                                collectBucket(sub, docId, otherBucketOrd);
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) Scorer(org.apache.lucene.search.Scorer) Bits(org.apache.lucene.util.Bits) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 5 with LeafBucketCollector

use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.

the class BinaryRangeAggregatorTests method doTestSortedSetRangeLeafCollector.

private void doTestSortedSetRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
    final Set<BytesRef> termSet = new HashSet<>();
    final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
    while (termSet.size() < numTerms) {
        termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
    Arrays.sort(terms);
    final int numRanges = randomIntBetween(1, 10);
    BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
    for (int i = 0; i < numRanges; ++i) {
        ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
    FakeSortedSetDocValues values = new FakeSortedSetDocValues(terms);
    final int[] counts = new int[ranges.length];
    SortedSetRangeLeafCollector collector = new SortedSetRangeLeafCollector(values, ranges, null) {

        @Override
        protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
            counts[(int) bucket]++;
        }
    };
    final int[] expectedCounts = new int[ranges.length];
    final int maxDoc = randomIntBetween(5, 10);
    for (int doc = 0; doc < maxDoc; ++doc) {
        LongHashSet ordinalSet = new LongHashSet();
        final int numValues = randomInt(maxNumValuesPerDoc);
        while (ordinalSet.size() < numValues) {
            ordinalSet.add(random().nextInt(terms.length));
        }
        final long[] ords = ordinalSet.toArray();
        Arrays.sort(ords);
        values.ords = ords;
        // simulate aggregation
        collector.collect(doc);
        // now do it the naive way
        for (int i = 0; i < ranges.length; ++i) {
            for (long ord : ords) {
                BytesRef term = terms[(int) ord];
                if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
                    expectedCounts[i]++;
                    break;
                }
            }
        }
    }
    assertArrayEquals(expectedCounts, counts);
}
Also used : SortedSetRangeLeafCollector(org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedSetRangeLeafCollector) LongHashSet(com.carrotsearch.hppc.LongHashSet) LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) LongHashSet(com.carrotsearch.hppc.LongHashSet)

Aggregations

LeafBucketCollector (org.elasticsearch.search.aggregations.LeafBucketCollector)5 Scorer (org.apache.lucene.search.Scorer)3 LongHashSet (com.carrotsearch.hppc.LongHashSet)2 HashSet (java.util.HashSet)2 SortedDocValues (org.apache.lucene.index.SortedDocValues)2 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)2 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 Bits (org.apache.lucene.util.Bits)2 BytesRef (org.apache.lucene.util.BytesRef)2 IndexReader (org.apache.lucene.index.IndexReader)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Weight (org.apache.lucene.search.Weight)1 PackedLongValues (org.apache.lucene.util.packed.PackedLongValues)1 LongHash (org.elasticsearch.common.util.LongHash)1 SortedBinaryRangeLeafCollector (org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedBinaryRangeLeafCollector)1 SortedSetRangeLeafCollector (org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedSetRangeLeafCollector)1