Search in sources :

Example 6 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class CompositeAggregator method getLeafCollector.

@Override
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
    finishLeaf();
    boolean fillDocIdSet = deferredCollectors != NO_OP_COLLECTOR;
    Sort indexSortPrefix = buildIndexSortPrefix(ctx);
    int sortPrefixLen = computeSortPrefixLen(indexSortPrefix);
    SortedDocsProducer sortedDocsProducer = sortPrefixLen == 0 ? sources[0].createSortedDocsProducerOrNull(ctx.reader(), context.query()) : null;
    if (sortedDocsProducer != null) {
        // Visit documents sorted by the leading source of the composite definition and terminates
        // when the leading source value is guaranteed to be greater than the lowest composite bucket
        // in the queue.
        DocIdSet docIdSet = sortedDocsProducer.processLeaf(context.query(), queue, ctx, fillDocIdSet);
        if (fillDocIdSet) {
            entries.add(new Entry(ctx, docIdSet));
        }
        // We can bypass search entirely for this segment, the processing is done in the previous call.
        // Throwing this exception will terminate the execution of the search for this root aggregation,
        // see {@link MultiCollector} for more details on how we handle early termination in aggregations.
        earlyTerminated = true;
        throw new CollectionTerminatedException();
    } else {
        if (fillDocIdSet) {
            currentLeaf = ctx;
            docIdSetBuilder = new RoaringDocIdSet.Builder(ctx.reader().maxDoc());
        }
        if (rawAfterKey != null && sortPrefixLen > 0) {
            // We have an after key and index sort is applicable so we jump directly to the doc
            // that is after the index sort prefix using the rawAfterKey and we start collecting
            // document from there.
            processLeafFromQuery(ctx, indexSortPrefix);
            throw new CollectionTerminatedException();
        } else {
            final LeafBucketCollector inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen));
            return new LeafBucketCollector() {

                @Override
                public void collect(int doc, long zeroBucket) throws IOException {
                    assert zeroBucket == 0L;
                    inner.collect(doc);
                }
            };
        }
    }
}
Also used : LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) Sort(org.apache.lucene.search.Sort) DocIdSet(org.apache.lucene.search.DocIdSet) RoaringDocIdSet(org.apache.lucene.util.RoaringDocIdSet) RoaringDocIdSet(org.apache.lucene.util.RoaringDocIdSet)

Example 7 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class BestBucketsDeferringCollector method prepareSelectedBuckets.

/**
 * Replay the wrapped collector, but only on a selection of buckets.
 */
@Override
public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
    if (finished == false) {
        throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called");
    }
    if (this.selectedBuckets != null) {
        throw new IllegalStateException("Already been replayed");
    }
    this.selectedBuckets = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
    for (long ord : selectedBuckets) {
        this.selectedBuckets.add(ord);
    }
    boolean needsScores = scoreMode().needsScores();
    Weight weight = null;
    if (needsScores) {
        Query query = isGlobal ? new MatchAllDocsQuery() : searchContext.query();
        weight = searchContext.searcher().createWeight(searchContext.searcher().rewrite(query), ScoreMode.COMPLETE, 1f);
    }
    for (Entry entry : entries) {
        assert entry.docDeltas.size() > 0 : "segment should have at least one document to replay, got 0";
        try {
            final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
            DocIdSetIterator scoreIt = null;
            if (needsScores) {
                Scorer scorer = weight.scorer(entry.context);
                // We don't need to check if the scorer is null
                // since we are sure that there are documents to replay (entry.docDeltas it not empty).
                scoreIt = scorer.iterator();
                leafCollector.setScorer(scorer);
            }
            final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
            final PackedLongValues.Iterator buckets = entry.buckets.iterator();
            int doc = 0;
            for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
                doc += (int) docDeltaIterator.next();
                final long bucket = buckets.next();
                final long rebasedBucket = this.selectedBuckets.find(bucket);
                if (rebasedBucket != -1) {
                    if (needsScores) {
                        if (scoreIt.docID() < doc) {
                            scoreIt.advance(doc);
                        }
                        // aggregations should only be replayed on matching documents
                        assert scoreIt.docID() == doc;
                    }
                    leafCollector.collect(doc, rebasedBucket);
                }
            }
        } catch (CollectionTerminatedException e) {
        // collection was terminated prematurely
        // continue with the following leaf
        }
    }
    collector.postCollection();
}
Also used : PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LongHash(org.opensearch.common.util.LongHash) Scorer(org.apache.lucene.search.Scorer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Weight(org.apache.lucene.search.Weight) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 8 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class BucketsAggregatorTests method buildMergeAggregator.

public BucketsAggregator buildMergeAggregator() throws IOException {
    try (Directory directory = newDirectory()) {
        try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
            Document document = new Document();
            document.add(new SortedNumericDocValuesField("numeric", 0));
            indexWriter.addDocument(document);
        }
        try (IndexReader indexReader = DirectoryReader.open(directory)) {
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            SearchContext searchContext = createSearchContext(indexSearcher, createIndexSettings(), null, new MultiBucketConsumerService.MultiBucketConsumer(DEFAULT_MAX_BUCKETS, new NoneCircuitBreakerService().getBreaker(CircuitBreaker.REQUEST)), new NumberFieldMapper.NumberFieldType("test", NumberFieldMapper.NumberType.INTEGER));
            return new BucketsAggregator("test", AggregatorFactories.EMPTY, searchContext, null, null, null) {

                @Override
                protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
                    return null;
                }

                @Override
                public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
                    return new InternalAggregation[0];
                }

                @Override
                public InternalAggregation buildEmptyAggregation() {
                    return null;
                }
            };
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumberFieldMapper(org.opensearch.index.mapper.NumberFieldMapper) SearchContext(org.opensearch.search.internal.SearchContext) Document(org.apache.lucene.document.Document) InternalAggregation(org.opensearch.search.aggregations.InternalAggregation) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) MultiBucketConsumerService(org.opensearch.search.aggregations.MultiBucketConsumerService) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) NoneCircuitBreakerService(org.opensearch.indices.breaker.NoneCircuitBreakerService)

Example 9 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class FilterAggregatorTests method testBucketComparator.

public void testBucketComparator() throws IOException {
    try (Directory directory = newDirectory()) {
        try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
            indexWriter.addDocument(singleton(new Field("field", "1", KeywordFieldMapper.Defaults.FIELD_TYPE)));
        }
        try (IndexReader indexReader = DirectoryReader.open(directory)) {
            IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
            FilterAggregationBuilder builder = new FilterAggregationBuilder("test", new MatchAllQueryBuilder());
            FilterAggregator agg = createAggregator(builder, indexSearcher, fieldType);
            agg.preCollection();
            LeafBucketCollector collector = agg.getLeafCollector(indexReader.leaves().get(0));
            collector.collect(0, 0);
            collector.collect(0, 0);
            collector.collect(0, 1);
            BucketComparator c = agg.bucketComparator(null, SortOrder.ASC);
            assertThat(c.compare(0, 1), greaterThan(0));
            assertThat(c.compare(1, 0), lessThan(0));
            c = agg.bucketComparator("doc_count", SortOrder.ASC);
            assertThat(c.compare(0, 1), greaterThan(0));
            assertThat(c.compare(1, 0), lessThan(0));
            Exception e = expectThrows(IllegalArgumentException.class, () -> agg.bucketComparator("garbage", randomFrom(SortOrder.values())));
            assertThat(e.getMessage(), equalTo("Ordering on a single-bucket aggregation can only be done on its doc_count. " + "Either drop the key (a la \"test\") or change it to \"doc_count\" (a la \"test.doc_count\") or \"key\"."));
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Field(org.apache.lucene.document.Field) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) IndexReader(org.apache.lucene.index.IndexReader) BucketComparator(org.opensearch.search.aggregations.Aggregator.BucketComparator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IOException(java.io.IOException) Directory(org.apache.lucene.store.Directory) MatchAllQueryBuilder(org.opensearch.index.query.MatchAllQueryBuilder)

Example 10 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class BinaryRangeAggregatorTests method doTestSortedSetRangeLeafCollector.

private void doTestSortedSetRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
    final Set<BytesRef> termSet = new HashSet<>();
    final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
    while (termSet.size() < numTerms) {
        termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
    Arrays.sort(terms);
    final int numRanges = randomIntBetween(1, 10);
    BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
    for (int i = 0; i < numRanges; ++i) {
        ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
    FakeSortedSetDocValues values = new FakeSortedSetDocValues(terms);
    final int[] counts = new int[ranges.length];
    SortedSetRangeLeafCollector collector = new SortedSetRangeLeafCollector(values, ranges, null) {

        @Override
        protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
            counts[(int) bucket]++;
        }
    };
    final int[] expectedCounts = new int[ranges.length];
    final int maxDoc = randomIntBetween(5, 10);
    for (int doc = 0; doc < maxDoc; ++doc) {
        LongHashSet ordinalSet = new LongHashSet();
        final int numValues = randomInt(maxNumValuesPerDoc);
        while (ordinalSet.size() < numValues) {
            ordinalSet.add(random().nextInt(terms.length));
        }
        final long[] ords = ordinalSet.toArray();
        Arrays.sort(ords);
        values.ords = ords;
        // simulate aggregation
        collector.collect(doc);
        // now do it the naive way
        for (int i = 0; i < ranges.length; ++i) {
            for (long ord : ords) {
                BytesRef term = terms[(int) ord];
                if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
                    expectedCounts[i]++;
                    break;
                }
            }
        }
    }
    assertArrayEquals(expectedCounts, counts);
}
Also used : SortedSetRangeLeafCollector(org.opensearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedSetRangeLeafCollector) LongHashSet(com.carrotsearch.hppc.LongHashSet) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) LongHashSet(com.carrotsearch.hppc.LongHashSet)

Aggregations

LeafBucketCollector (org.opensearch.search.aggregations.LeafBucketCollector)23 IndexReader (org.apache.lucene.index.IndexReader)8 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Directory (org.apache.lucene.store.Directory)8 IOException (java.io.IOException)7 Document (org.apache.lucene.document.Document)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)7 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 Query (org.apache.lucene.search.Query)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)5 IndexWriter (org.apache.lucene.index.IndexWriter)5 Bits (org.apache.lucene.util.Bits)5 ArrayList (java.util.ArrayList)4 List (java.util.List)4 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)4 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)4 ScoreMode (org.apache.lucene.search.ScoreMode)4 Scorer (org.apache.lucene.search.Scorer)4 Weight (org.apache.lucene.search.Weight)4