use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.
the class BestBucketsDeferringCollector method prepareSelectedBuckets.
/**
* Replay the wrapped collector, but only on a selection of buckets.
*/
@Override
public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
if (!finished) {
throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called");
}
if (this.selectedBuckets != null) {
throw new IllegalStateException("Already been replayed");
}
final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
for (long bucket : selectedBuckets) {
hash.add(bucket);
}
this.selectedBuckets = hash;
boolean needsScores = collector.needsScores();
Weight weight = null;
if (needsScores) {
weight = searchContext.searcher().createNormalizedWeight(searchContext.query(), true);
}
for (Entry entry : entries) {
final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
DocIdSetIterator docIt = null;
if (needsScores && entry.docDeltas.size() > 0) {
Scorer scorer = weight.scorer(entry.context);
// We don't need to check if the scorer is null
// since we are sure that there are documents to replay (entry.docDeltas it not empty).
docIt = scorer.iterator();
leafCollector.setScorer(scorer);
}
final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
final PackedLongValues.Iterator buckets = entry.buckets.iterator();
int doc = 0;
for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
doc += docDeltaIterator.next();
final long bucket = buckets.next();
final long rebasedBucket = hash.find(bucket);
if (rebasedBucket != -1) {
if (needsScores) {
if (docIt.docID() < doc) {
docIt.advance(doc);
}
// aggregations should only be replayed on matching documents
assert docIt.docID() == doc;
}
leafCollector.collect(doc, rebasedBucket);
}
}
}
collector.postCollection();
}
use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.
the class ParentToChildrenAggregator method getLeafCollector.
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
if (valuesSource == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}
final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
assert globalOrdinals != null;
Scorer parentScorer = parentFilter.scorer(ctx);
final Bits parentDocs = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), parentScorer);
return new LeafBucketCollector() {
@Override
public void collect(int docId, long bucket) throws IOException {
if (parentDocs.get(docId)) {
long globalOrdinal = globalOrdinals.getOrd(docId);
if (globalOrdinal != -1) {
if (parentOrdToBuckets.get(globalOrdinal) == -1) {
parentOrdToBuckets.set(globalOrdinal, bucket);
} else {
long[] bucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
if (bucketOrds != null) {
bucketOrds = Arrays.copyOf(bucketOrds, bucketOrds.length + 1);
bucketOrds[bucketOrds.length - 1] = bucket;
parentOrdToOtherBuckets.put(globalOrdinal, bucketOrds);
} else {
parentOrdToOtherBuckets.put(globalOrdinal, new long[] { bucket });
}
multipleBucketsPerParentOrd = true;
}
}
}
}
};
}
use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.
the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.
private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
final int[] counts = new int[ranges.length];
SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.
the class ParentToChildrenAggregator method doPostCollection.
@Override
protected void doPostCollection() throws IOException {
IndexReader indexReader = context().searcher().getIndexReader();
for (LeafReaderContext ctx : indexReader.leaves()) {
Scorer childDocsScorer = childFilter.scorer(ctx);
if (childDocsScorer == null) {
continue;
}
DocIdSetIterator childDocsIter = childDocsScorer.iterator();
final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
// Set the scorer, since we now replay only the child docIds
sub.setScorer(new ConstantScoreScorer(null, 1f, childDocsIter));
final Bits liveDocs = ctx.reader().getLiveDocs();
for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) {
if (liveDocs != null && liveDocs.get(docId) == false) {
continue;
}
long globalOrdinal = globalOrdinals.getOrd(docId);
if (globalOrdinal != -1) {
long bucketOrd = parentOrdToBuckets.get(globalOrdinal);
if (bucketOrd != -1) {
collectBucket(sub, docId, bucketOrd);
if (multipleBucketsPerParentOrd) {
long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
if (otherBucketOrds != null) {
for (long otherBucketOrd : otherBucketOrds) {
collectBucket(sub, docId, otherBucketOrd);
}
}
}
}
}
}
}
}
use of org.elasticsearch.search.aggregations.LeafBucketCollector in project elasticsearch by elastic.
the class BinaryRangeAggregatorTests method doTestSortedSetRangeLeafCollector.
private void doTestSortedSetRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedSetDocValues values = new FakeSortedSetDocValues(terms);
final int[] counts = new int[ranges.length];
SortedSetRangeLeafCollector collector = new SortedSetRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
Aggregations