Search in sources :

Example 16 with SortedBinaryDocValues

use of org.elasticsearch.index.fielddata.SortedBinaryDocValues in project elasticsearch by elastic.

the class StringTermsAggregator method getLeafCollector.

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
    final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
    return new LeafBucketCollectorBase(sub, values) {

        final BytesRefBuilder previous = new BytesRefBuilder();

        @Override
        public void collect(int doc, long bucket) throws IOException {
            assert bucket == 0;
            values.setDocument(doc);
            final int valuesCount = values.count();
            // SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
            previous.clear();
            for (int i = 0; i < valuesCount; ++i) {
                final BytesRef bytes = values.valueAt(i);
                if (includeExclude != null && !includeExclude.accept(bytes)) {
                    continue;
                }
                if (previous.get().equals(bytes)) {
                    continue;
                }
                long bucketOrdinal = bucketOrds.add(bytes);
                if (bucketOrdinal < 0) {
                    // already seen
                    bucketOrdinal = -1 - bucketOrdinal;
                    collectExistingBucket(sub, doc, bucketOrdinal);
                } else {
                    collectBucket(sub, doc, bucketOrdinal);
                }
                previous.copyBytes(bytes);
            }
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LeafBucketCollectorBase(org.elasticsearch.search.aggregations.LeafBucketCollectorBase) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Example 17 with SortedBinaryDocValues

use of org.elasticsearch.index.fielddata.SortedBinaryDocValues in project elasticsearch by elastic.

the class StringTermsAggregator method buildAggregation.

@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
        // we need to fill-in the blanks
        for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
            final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
            // brute force
            for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                values.setDocument(docId);
                final int valueCount = values.count();
                for (int i = 0; i < valueCount; ++i) {
                    final BytesRef term = values.valueAt(i);
                    if (includeExclude == null || includeExclude.accept(term)) {
                        bucketOrds.add(term);
                    }
                }
            }
        }
    }
    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long otherDocCount = 0;
    BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
    StringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
        }
        bucketOrds.get(i, spare.termBytes);
        spare.docCount = bucketDocCount(i);
        otherDocCount += spare.docCount;
        spare.bucketOrd = i;
        if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
            spare = ordered.insertWithOverflow(spare);
        }
    }
    // Get the top buckets
    final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
    long[] survivingBucketOrds = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
        survivingBucketOrds[i] = bucket.bucketOrd;
        list[i] = bucket;
        otherDocCount -= bucket.docCount;
    }
    // replay any deferred collections
    runDeferredCollections(survivingBucketOrds);
    // Now build the aggs
    for (int i = 0; i < list.length; i++) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) list[i];
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        bucket.docCountError = 0;
    }
    return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Also used : BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Aggregations

SortedBinaryDocValues (org.elasticsearch.index.fielddata.SortedBinaryDocValues)17 BytesRef (org.apache.lucene.util.BytesRef)13 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)3 IndexService (org.elasticsearch.index.IndexService)3 Searcher (org.elasticsearch.index.engine.Engine.Searcher)3 QueryShardContext (org.elasticsearch.index.query.QueryShardContext)3 IOException (java.io.IOException)2 SortedDocValues (org.apache.lucene.index.SortedDocValues)2 Scorer (org.apache.lucene.search.Scorer)2 FixedBitSet (org.apache.lucene.util.FixedBitSet)2 LeafBucketCollectorBase (org.elasticsearch.search.aggregations.LeafBucketCollectorBase)2 RandomAccessOrds (org.apache.lucene.index.RandomAccessOrds)1 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)1 Explanation (org.apache.lucene.search.Explanation)1 SortField (org.apache.lucene.search.SortField)1 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)1 BitSet (org.apache.lucene.util.BitSet)1 BitSetIterator (org.apache.lucene.util.BitSetIterator)1