use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class CompositeAggregator method getLeafCollector.
@Override
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
finishLeaf();
boolean fillDocIdSet = deferredCollectors != NO_OP_COLLECTOR;
Sort indexSortPrefix = buildIndexSortPrefix(ctx);
int sortPrefixLen = computeSortPrefixLen(indexSortPrefix);
SortedDocsProducer sortedDocsProducer = sortPrefixLen == 0 ? sources[0].createSortedDocsProducerOrNull(ctx.reader(), context.query()) : null;
if (sortedDocsProducer != null) {
// Visit documents sorted by the leading source of the composite definition and terminates
// when the leading source value is guaranteed to be greater than the lowest composite bucket
// in the queue.
DocIdSet docIdSet = sortedDocsProducer.processLeaf(context.query(), queue, ctx, fillDocIdSet);
if (fillDocIdSet) {
entries.add(new Entry(ctx, docIdSet));
}
// We can bypass search entirely for this segment, the processing is done in the previous call.
// Throwing this exception will terminate the execution of the search for this root aggregation,
// see {@link MultiCollector} for more details on how we handle early termination in aggregations.
earlyTerminated = true;
throw new CollectionTerminatedException();
} else {
if (fillDocIdSet) {
currentLeaf = ctx;
docIdSetBuilder = new RoaringDocIdSet.Builder(ctx.reader().maxDoc());
}
if (rawAfterKey != null && sortPrefixLen > 0) {
// We have an after key and index sort is applicable so we jump directly to the doc
// that is after the index sort prefix using the rawAfterKey and we start collecting
// document from there.
processLeafFromQuery(ctx, indexSortPrefix);
throw new CollectionTerminatedException();
} else {
final LeafBucketCollector inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen));
return new LeafBucketCollector() {
@Override
public void collect(int doc, long zeroBucket) throws IOException {
assert zeroBucket == 0L;
inner.collect(doc);
}
};
}
}
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class BestBucketsDeferringCollector method prepareSelectedBuckets.
/**
* Replay the wrapped collector, but only on a selection of buckets.
*/
@Override
public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
if (finished == false) {
throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called");
}
if (this.selectedBuckets != null) {
throw new IllegalStateException("Already been replayed");
}
this.selectedBuckets = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
for (long ord : selectedBuckets) {
this.selectedBuckets.add(ord);
}
boolean needsScores = scoreMode().needsScores();
Weight weight = null;
if (needsScores) {
Query query = isGlobal ? new MatchAllDocsQuery() : searchContext.query();
weight = searchContext.searcher().createWeight(searchContext.searcher().rewrite(query), ScoreMode.COMPLETE, 1f);
}
for (Entry entry : entries) {
assert entry.docDeltas.size() > 0 : "segment should have at least one document to replay, got 0";
try {
final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
DocIdSetIterator scoreIt = null;
if (needsScores) {
Scorer scorer = weight.scorer(entry.context);
// We don't need to check if the scorer is null
// since we are sure that there are documents to replay (entry.docDeltas it not empty).
scoreIt = scorer.iterator();
leafCollector.setScorer(scorer);
}
final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
final PackedLongValues.Iterator buckets = entry.buckets.iterator();
int doc = 0;
for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
doc += (int) docDeltaIterator.next();
final long bucket = buckets.next();
final long rebasedBucket = this.selectedBuckets.find(bucket);
if (rebasedBucket != -1) {
if (needsScores) {
if (scoreIt.docID() < doc) {
scoreIt.advance(doc);
}
// aggregations should only be replayed on matching documents
assert scoreIt.docID() == doc;
}
leafCollector.collect(doc, rebasedBucket);
}
}
} catch (CollectionTerminatedException e) {
// collection was terminated prematurely
// continue with the following leaf
}
}
collector.postCollection();
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class BucketsAggregatorTests method buildMergeAggregator.
public BucketsAggregator buildMergeAggregator() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new SortedNumericDocValuesField("numeric", 0));
indexWriter.addDocument(document);
}
try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
SearchContext searchContext = createSearchContext(indexSearcher, createIndexSettings(), null, new MultiBucketConsumerService.MultiBucketConsumer(DEFAULT_MAX_BUCKETS, new NoneCircuitBreakerService().getBreaker(CircuitBreaker.REQUEST)), new NumberFieldMapper.NumberFieldType("test", NumberFieldMapper.NumberType.INTEGER));
return new BucketsAggregator("test", AggregatorFactories.EMPTY, searchContext, null, null, null) {
@Override
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
return null;
}
@Override
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
return new InternalAggregation[0];
}
@Override
public InternalAggregation buildEmptyAggregation() {
return null;
}
};
}
}
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class FilterAggregatorTests method testBucketComparator.
public void testBucketComparator() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
indexWriter.addDocument(singleton(new Field("field", "1", KeywordFieldMapper.Defaults.FIELD_TYPE)));
}
try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", new MatchAllQueryBuilder());
FilterAggregator agg = createAggregator(builder, indexSearcher, fieldType);
agg.preCollection();
LeafBucketCollector collector = agg.getLeafCollector(indexReader.leaves().get(0));
collector.collect(0, 0);
collector.collect(0, 0);
collector.collect(0, 1);
BucketComparator c = agg.bucketComparator(null, SortOrder.ASC);
assertThat(c.compare(0, 1), greaterThan(0));
assertThat(c.compare(1, 0), lessThan(0));
c = agg.bucketComparator("doc_count", SortOrder.ASC);
assertThat(c.compare(0, 1), greaterThan(0));
assertThat(c.compare(1, 0), lessThan(0));
Exception e = expectThrows(IllegalArgumentException.class, () -> agg.bucketComparator("garbage", randomFrom(SortOrder.values())));
assertThat(e.getMessage(), equalTo("Ordering on a single-bucket aggregation can only be done on its doc_count. " + "Either drop the key (a la \"test\") or change it to \"doc_count\" (a la \"test.doc_count\") or \"key\"."));
}
}
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class BinaryRangeAggregatorTests method doTestSortedSetRangeLeafCollector.
private void doTestSortedSetRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedSetDocValues values = new FakeSortedSetDocValues(terms);
final int[] counts = new int[ranges.length];
SortedSetRangeLeafCollector collector = new SortedSetRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
Aggregations