Search in sources :

Example 1 with BucketPriorityQueue

use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.

the class LongTermsAggregator method buildAggregation.

@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
        // we need to fill-in the blanks
        for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
            final SortedNumericDocValues values = getValues(valuesSource, ctx);
            for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                values.setDocument(docId);
                final int valueCount = values.count();
                for (int i = 0; i < valueCount; ++i) {
                    long value = values.valueAt(i);
                    if (longFilter == null || longFilter.accept(value)) {
                        bucketOrds.add(value);
                    }
                }
            }
        }
    }
    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long otherDocCount = 0;
    BucketPriorityQueue<LongTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
    LongTerms.Bucket spare = null;
    for (long i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new LongTerms.Bucket(0, 0, null, showTermDocCountError, 0, format);
        }
        spare.term = bucketOrds.get(i);
        spare.docCount = bucketDocCount(i);
        otherDocCount += spare.docCount;
        spare.bucketOrd = i;
        if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
            spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
        }
    }
    // Get the top buckets
    final LongTerms.Bucket[] list = new LongTerms.Bucket[ordered.size()];
    long[] survivingBucketOrds = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
        survivingBucketOrds[i] = bucket.bucketOrd;
        list[i] = bucket;
        otherDocCount -= bucket.docCount;
    }
    runDeferredCollections(survivingBucketOrds);
    // Now build the aggs
    for (int i = 0; i < list.length; i++) {
        list[i].aggregations = bucketAggregations(list[i].bucketOrd);
        list[i].docCountError = 0;
    }
    return new LongTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Also used : BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 2 with BucketPriorityQueue

use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.

the class GlobalOrdinalsStringTermsAggregator method buildAggregation.

@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    if (globalOrds == null) {
        // no context in this reader
        return buildEmptyAggregation();
    }
    final int size;
    if (bucketCountThresholds.getMinDocCount() == 0) {
        // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
        size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
    } else {
        size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
    }
    long otherDocCount = 0;
    BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
    OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
    for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
        if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
            continue;
        }
        final long bucketOrd = getBucketOrd(globalTermOrd);
        final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
        if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
            continue;
        }
        otherDocCount += bucketDocCount;
        spare.globalOrd = globalTermOrd;
        spare.bucketOrd = bucketOrd;
        spare.docCount = bucketDocCount;
        if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
            spare = ordered.insertWithOverflow(spare);
            if (spare == null) {
                spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
            }
        }
    }
    // Get the top buckets
    final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
    long[] survivingBucketOrds = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final OrdBucket bucket = (OrdBucket) ordered.pop();
        survivingBucketOrds[i] = bucket.bucketOrd;
        BytesRef scratch = new BytesRef();
        copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
        list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
        list[i].bucketOrd = bucket.bucketOrd;
        otherDocCount -= list[i].docCount;
    }
    //replay any deferred collections
    runDeferredCollections(survivingBucketOrds);
    //Now build the aggs
    for (int i = 0; i < list.length; i++) {
        StringTerms.Bucket bucket = list[i];
        bucket.aggregations = bucket.docCount == 0 ? bucketEmptyAggregations() : bucketAggregations(bucket.bucketOrd);
        bucket.docCountError = 0;
    }
    return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Also used : BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with BucketPriorityQueue

use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.

the class InternalTerms method doReduce.

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    Map<Object, List<B>> buckets = new HashMap<>();
    long sumDocCountError = 0;
    long otherDocCount = 0;
    InternalTerms<A, B> referenceTerms = null;
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
        if (referenceTerms == null && !aggregation.getClass().equals(UnmappedTerms.class)) {
            referenceTerms = terms;
        }
        if (referenceTerms != null && !referenceTerms.getClass().equals(terms.getClass()) && !terms.getClass().equals(UnmappedTerms.class)) {
            // is of different types in different indices.
            throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation [" + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different " + "types in two different indices");
        }
        otherDocCount += terms.getSumOfOtherDocCounts();
        final long thisAggDocCountError;
        if (terms.getBucketsInternal().size() < getShardSize() || InternalOrder.isTermOrder(order)) {
            thisAggDocCountError = 0;
        } else if (InternalOrder.isCountDesc(this.order)) {
            if (terms.getDocCountError() > 0) {
                // If there is an existing docCountError for this agg then
                // use this as the error for this aggregation
                thisAggDocCountError = terms.getDocCountError();
            } else {
                // otherwise use the doc count of the last term in the
                // aggregation
                thisAggDocCountError = terms.getBucketsInternal().get(terms.getBucketsInternal().size() - 1).docCount;
            }
        } else {
            thisAggDocCountError = -1;
        }
        if (sumDocCountError != -1) {
            if (thisAggDocCountError == -1) {
                sumDocCountError = -1;
            } else {
                sumDocCountError += thisAggDocCountError;
            }
        }
        setDocCountError(thisAggDocCountError);
        for (B bucket : terms.getBucketsInternal()) {
            // If there is already a doc count error for this bucket
            // subtract this aggs doc count error from it to make the
            // new value for the bucket. This then means that when the
            // final error for the bucket is calculated below we account
            // for the existing error calculated in a previous reduce.
            // Note that if the error is unbounded (-1) this will be fixed
            // later in this method.
            bucket.docCountError -= thisAggDocCountError;
            List<B> bucketList = buckets.get(bucket.getKey());
            if (bucketList == null) {
                bucketList = new ArrayList<>();
                buckets.put(bucket.getKey(), bucketList);
            }
            bucketList.add(bucket);
        }
    }
    final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
    final BucketPriorityQueue<B> ordered = new BucketPriorityQueue<>(size, order.comparator(null));
    for (List<B> sameTermBuckets : buckets.values()) {
        final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
        if (sumDocCountError == -1) {
            b.docCountError = -1;
        } else {
            b.docCountError += sumDocCountError;
        }
        if (b.docCount >= minDocCount || reduceContext.isFinalReduce() == false) {
            B removed = ordered.insertWithOverflow(b);
            if (removed != null) {
                otherDocCount += removed.getDocCount();
            }
        }
    }
    B[] list = createBucketsArray(ordered.size());
    for (int i = ordered.size() - 1; i >= 0; i--) {
        list[i] = ordered.pop();
    }
    long docCountError;
    if (sumDocCountError == -1) {
        docCountError = -1;
    } else {
        docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
    }
    return create(name, Arrays.asList(list), docCountError, otherDocCount);
}
Also used : HashMap(java.util.HashMap) InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ArrayList(java.util.ArrayList) List(java.util.List) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 4 with BucketPriorityQueue

use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.

the class StringTermsAggregator method buildAggregation.

@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
        // we need to fill-in the blanks
        for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
            final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
            // brute force
            for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                values.setDocument(docId);
                final int valueCount = values.count();
                for (int i = 0; i < valueCount; ++i) {
                    final BytesRef term = values.valueAt(i);
                    if (includeExclude == null || includeExclude.accept(term)) {
                        bucketOrds.add(term);
                    }
                }
            }
        }
    }
    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long otherDocCount = 0;
    BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
    StringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
        }
        bucketOrds.get(i, spare.termBytes);
        spare.docCount = bucketDocCount(i);
        otherDocCount += spare.docCount;
        spare.bucketOrd = i;
        if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
            spare = ordered.insertWithOverflow(spare);
        }
    }
    // Get the top buckets
    final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
    long[] survivingBucketOrds = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
        survivingBucketOrds[i] = bucket.bucketOrd;
        list[i] = bucket;
        otherDocCount -= bucket.docCount;
    }
    // replay any deferred collections
    runDeferredCollections(survivingBucketOrds);
    // Now build the aggs
    for (int i = 0; i < list.length; i++) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) list[i];
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        bucket.docCountError = 0;
    }
    return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Also used : BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Aggregations

BucketPriorityQueue (org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 BytesRef (org.apache.lucene.util.BytesRef)2 ArrayList (java.util.ArrayList)1 Collections.unmodifiableList (java.util.Collections.unmodifiableList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)1 SortedBinaryDocValues (org.elasticsearch.index.fielddata.SortedBinaryDocValues)1 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)1 InternalAggregation (org.elasticsearch.search.aggregations.InternalAggregation)1