Search in sources :

Example 21 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class MergingBucketsDeferringCollectorTests method testBucketMergeAndDelete.

public void testBucketMergeAndDelete() throws Exception {
    testCase((deferringCollector, delegate) -> new LeafBucketCollector() {

        @Override
        public void collect(int doc, long owningBucketOrd) throws IOException {
            // Only collects at top level
            assert owningBucketOrd == 0;
            delegate.collect(doc, doc);
            if (doc == 7) {
                deferringCollector.mergeBuckets(oldBucket -> oldBucket > 3 ? 0 : -1);
            }
        }
    }, (deferringCollector, finalCollector) -> {
        deferringCollector.prepareSelectedBuckets(0, 8, 9);
        assertThat(finalCollector.collection, equalTo(org.opensearch.common.collect.Map.of(0L, org.opensearch.common.collect.List.of(4, 5, 6, 7), 1L, org.opensearch.common.collect.List.of(8), 2L, org.opensearch.common.collect.List.of(9))));
    });
}
Also used : Query(org.apache.lucene.search.Query) CheckedBiConsumer(org.opensearch.common.CheckedBiConsumer) SearchContext(org.opensearch.search.internal.SearchContext) BiFunction(java.util.function.BiFunction) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) BucketCollector(org.opensearch.search.aggregations.BucketCollector) Directory(org.apache.lucene.store.Directory) Map(java.util.Map) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) AggregatorTestCase(org.opensearch.search.aggregations.AggregatorTestCase) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Mockito.when(org.mockito.Mockito.when) ScoreMode(org.apache.lucene.search.ScoreMode) IndexWriter(org.apache.lucene.index.IndexWriter) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) IOException(java.io.IOException)

Example 22 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class CompositeValuesCollectorQueueTests method testRandomCase.

private void testRandomCase(boolean forceMerge, boolean missingBucket, int indexSortSourcePrefix, ClassAndName... types) throws IOException {
    final BigArrays bigArrays = BigArrays.NON_RECYCLING_INSTANCE;
    int numDocs = randomIntBetween(50, 100);
    List<Comparable<?>[]> possibleValues = new ArrayList<>();
    SortField[] indexSortFields = indexSortSourcePrefix == 0 ? null : new SortField[indexSortSourcePrefix];
    for (int i = 0; i < types.length; i++) {
        ClassAndName type = types[i];
        final Comparable<?>[] values;
        int numValues = randomIntBetween(1, numDocs * 2);
        values = new Comparable[numValues];
        if (type.clazz == Long.class) {
            if (i < indexSortSourcePrefix) {
                indexSortFields[i] = new SortedNumericSortField(type.fieldType.name(), SortField.Type.LONG);
            }
            for (int j = 0; j < numValues; j++) {
                values[j] = randomLong();
            }
        } else if (type.clazz == Double.class) {
            if (i < indexSortSourcePrefix) {
                indexSortFields[i] = new SortedNumericSortField(type.fieldType.name(), SortField.Type.DOUBLE);
            }
            for (int j = 0; j < numValues; j++) {
                values[j] = randomDouble();
            }
        } else if (type.clazz == BytesRef.class) {
            if (i < indexSortSourcePrefix) {
                indexSortFields[i] = new SortedSetSortField(type.fieldType.name(), false);
            }
            for (int j = 0; j < numValues; j++) {
                values[j] = new BytesRef(randomAlphaOfLengthBetween(5, 50));
            }
        } else {
            assert (false);
        }
        possibleValues.add(values);
    }
    Set<CompositeKey> keys = new HashSet<>();
    try (Directory directory = newDirectory()) {
        final IndexWriterConfig writerConfig = newIndexWriterConfig();
        if (indexSortFields != null) {
            writerConfig.setIndexSort(new Sort(indexSortFields));
        }
        try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, new KeywordAnalyzer())) {
            for (int i = 0; i < numDocs; i++) {
                Document document = new Document();
                List<List<Comparable<?>>> docValues = new ArrayList<>();
                boolean hasAllField = true;
                for (int j = 0; j < types.length; j++) {
                    int numValues = indexSortSourcePrefix - 1 >= j ? 1 : randomIntBetween(0, 5);
                    List<Comparable<?>> values = new ArrayList<>();
                    if (numValues == 0) {
                        hasAllField = false;
                        if (missingBucket) {
                            values.add(null);
                        }
                    } else {
                        for (int k = 0; k < numValues; k++) {
                            values.add(possibleValues.get(j)[randomIntBetween(0, possibleValues.get(j).length - 1)]);
                            if (types[j].clazz == Long.class) {
                                long value = (Long) values.get(k);
                                document.add(new SortedNumericDocValuesField(types[j].fieldType.name(), value));
                                document.add(new LongPoint(types[j].fieldType.name(), value));
                            } else if (types[j].clazz == Double.class) {
                                document.add(new SortedNumericDocValuesField(types[j].fieldType.name(), NumericUtils.doubleToSortableLong((Double) values.get(k))));
                            } else if (types[j].clazz == BytesRef.class) {
                                BytesRef value = (BytesRef) values.get(k);
                                document.add(new SortedSetDocValuesField(types[j].fieldType.name(), (BytesRef) values.get(k)));
                                document.add(new TextField(types[j].fieldType.name(), value.utf8ToString(), Field.Store.NO));
                            } else {
                                assert (false);
                            }
                        }
                    }
                    docValues.add(values);
                }
                if (hasAllField || missingBucket) {
                    List<CompositeKey> comb = createListCombinations(docValues);
                    keys.addAll(comb);
                }
                indexWriter.addDocument(document);
            }
            if (forceMerge) {
                indexWriter.forceMerge(1);
            }
        }
        IndexReader reader = DirectoryReader.open(directory);
        int size = keys.size() > 1 ? randomIntBetween(1, keys.size()) : 1;
        SingleDimensionValuesSource<?>[] sources = new SingleDimensionValuesSource[types.length];
        for (int i = 0; i < types.length; i++) {
            final MappedFieldType fieldType = types[i].fieldType;
            if (types[i].clazz == Long.class) {
                sources[i] = new LongValuesSource(bigArrays, fieldType, context -> DocValues.getSortedNumeric(context.reader(), fieldType.name()), value -> value, DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
            } else if (types[i].clazz == Double.class) {
                sources[i] = new DoubleValuesSource(bigArrays, fieldType, context -> FieldData.sortableLongBitsToDoubles(DocValues.getSortedNumeric(context.reader(), fieldType.name())), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
            } else if (types[i].clazz == BytesRef.class) {
                if (forceMerge) {
                    // we don't create global ordinals but we test this mode when the reader has a single segment
                    // since ordinals are global in this case.
                    sources[i] = new GlobalOrdinalValuesSource(bigArrays, fieldType, context -> DocValues.getSortedSet(context.reader(), fieldType.name()), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
                } else {
                    sources[i] = new BinaryValuesSource(bigArrays, (b) -> {
                    }, fieldType, context -> FieldData.toString(DocValues.getSortedSet(context.reader(), fieldType.name())), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
                }
            } else {
                assert (false);
            }
        }
        CompositeKey[] expected = keys.toArray(new CompositeKey[0]);
        Arrays.sort(expected, (a, b) -> compareKey(a, b));
        for (boolean withProducer : new boolean[] { true, false }) {
            int pos = 0;
            CompositeKey last = null;
            while (pos < size) {
                final CompositeValuesCollectorQueue queue = new CompositeValuesCollectorQueue(BigArrays.NON_RECYCLING_INSTANCE, sources, size, last);
                final SortedDocsProducer docsProducer = sources[0].createSortedDocsProducerOrNull(reader, new MatchAllDocsQuery());
                for (LeafReaderContext leafReaderContext : reader.leaves()) {
                    if (docsProducer != null && withProducer) {
                        assertEquals(DocIdSet.EMPTY, docsProducer.processLeaf(new MatchAllDocsQuery(), queue, leafReaderContext, false));
                    } else {
                        final LeafBucketCollector leafCollector = new LeafBucketCollector() {

                            @Override
                            public void collect(int doc, long bucket) throws IOException {
                                queue.addIfCompetitive(indexSortSourcePrefix);
                            }
                        };
                        final LeafBucketCollector queueCollector = queue.getLeafCollector(leafReaderContext, leafCollector);
                        final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
                        for (int i = 0; i < leafReaderContext.reader().maxDoc(); i++) {
                            if (liveDocs == null || liveDocs.get(i)) {
                                try {
                                    queueCollector.collect(i);
                                } catch (CollectionTerminatedException exc) {
                                    assertThat(indexSortSourcePrefix, greaterThan(0));
                                }
                            }
                        }
                    }
                }
                assertEquals(size, Math.min(queue.size(), expected.length - pos));
                int ptr = pos + (queue.size() - 1);
                pos += queue.size();
                last = null;
                while (queue.size() > pos) {
                    CompositeKey key = queue.toCompositeKey(queue.pop());
                    if (last == null) {
                        last = key;
                    }
                    assertThat(key, equalTo(expected[ptr--]));
                }
            }
        }
        reader.close();
    }
}
Also used : Arrays(java.util.Arrays) LongPoint(org.apache.lucene.document.LongPoint) DocValueFormat(org.opensearch.search.DocValueFormat) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericUtils(org.apache.lucene.util.NumericUtils) LONG(org.opensearch.index.mapper.NumberFieldMapper.NumberType.LONG) NumberFieldMapper(org.opensearch.index.mapper.NumberFieldMapper) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) ArrayList(java.util.ArrayList) DocIdSet(org.apache.lucene.search.DocIdSet) HashSet(java.util.HashSet) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) DOUBLE(org.opensearch.index.mapper.NumberFieldMapper.NumberType.DOUBLE) MissingOrder(org.opensearch.search.aggregations.bucket.missing.MissingOrder) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SortField(org.apache.lucene.search.SortField) Bits(org.apache.lucene.util.Bits) KeywordFieldMapper(org.opensearch.index.mapper.KeywordFieldMapper) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) AggregatorTestCase(org.opensearch.search.aggregations.AggregatorTestCase) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) DirectoryReader(org.apache.lucene.index.DirectoryReader) Set(java.util.Set) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) List(java.util.List) DocValues(org.apache.lucene.index.DocValues) Field(org.apache.lucene.document.Field) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TextField(org.apache.lucene.document.TextField) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BigArrays(org.opensearch.common.util.BigArrays) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) IndexReader(org.apache.lucene.index.IndexReader) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) FieldData(org.opensearch.index.fielddata.FieldData) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) Sort(org.apache.lucene.search.Sort) TextField(org.apache.lucene.document.TextField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) BigArrays(org.opensearch.common.util.BigArrays) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Bits(org.apache.lucene.util.Bits) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LongPoint(org.apache.lucene.document.LongPoint) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LongPoint(org.apache.lucene.document.LongPoint) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 23 with LeafBucketCollector

use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.

the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.

private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
    final Set<BytesRef> termSet = new HashSet<>();
    final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
    while (termSet.size() < numTerms) {
        termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
    Arrays.sort(terms);
    final int numRanges = randomIntBetween(1, 10);
    BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
    for (int i = 0; i < numRanges; ++i) {
        ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
    FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
    final int[] counts = new int[ranges.length];
    SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {

        @Override
        protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
            counts[(int) bucket]++;
        }
    };
    final int[] expectedCounts = new int[ranges.length];
    final int maxDoc = randomIntBetween(5, 10);
    for (int doc = 0; doc < maxDoc; ++doc) {
        LongHashSet ordinalSet = new LongHashSet();
        final int numValues = randomInt(maxNumValuesPerDoc);
        while (ordinalSet.size() < numValues) {
            ordinalSet.add(random().nextInt(terms.length));
        }
        final long[] ords = ordinalSet.toArray();
        Arrays.sort(ords);
        values.ords = ords;
        // simulate aggregation
        collector.collect(doc);
        // now do it the naive way
        for (int i = 0; i < ranges.length; ++i) {
            for (long ord : ords) {
                BytesRef term = terms[(int) ord];
                if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
                    expectedCounts[i]++;
                    break;
                }
            }
        }
    }
    assertArrayEquals(expectedCounts, counts);
}
Also used : SortedBinaryRangeLeafCollector(org.opensearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedBinaryRangeLeafCollector) LongHashSet(com.carrotsearch.hppc.LongHashSet) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) LongHashSet(com.carrotsearch.hppc.LongHashSet)

Aggregations

LeafBucketCollector (org.opensearch.search.aggregations.LeafBucketCollector)23 IndexReader (org.apache.lucene.index.IndexReader)8 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 Directory (org.apache.lucene.store.Directory)8 IOException (java.io.IOException)7 Document (org.apache.lucene.document.Document)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)7 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 Query (org.apache.lucene.search.Query)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)5 IndexWriter (org.apache.lucene.index.IndexWriter)5 Bits (org.apache.lucene.util.Bits)5 ArrayList (java.util.ArrayList)4 List (java.util.List)4 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)4 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)4 ScoreMode (org.apache.lucene.search.ScoreMode)4 Scorer (org.apache.lucene.search.Scorer)4 Weight (org.apache.lucene.search.Weight)4