Search in sources :

Example 26 with InternalAggregation

use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.

the class InternalTerms method doReduce.

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    Map<Object, List<B>> buckets = new HashMap<>();
    long sumDocCountError = 0;
    long otherDocCount = 0;
    InternalTerms<A, B> referenceTerms = null;
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
        if (referenceTerms == null && !aggregation.getClass().equals(UnmappedTerms.class)) {
            referenceTerms = terms;
        }
        if (referenceTerms != null && !referenceTerms.getClass().equals(terms.getClass()) && !terms.getClass().equals(UnmappedTerms.class)) {
            // is of different types in different indices.
            throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation [" + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different " + "types in two different indices");
        }
        otherDocCount += terms.getSumOfOtherDocCounts();
        final long thisAggDocCountError;
        if (terms.getBucketsInternal().size() < getShardSize() || InternalOrder.isTermOrder(order)) {
            thisAggDocCountError = 0;
        } else if (InternalOrder.isCountDesc(this.order)) {
            if (terms.getDocCountError() > 0) {
                // If there is an existing docCountError for this agg then
                // use this as the error for this aggregation
                thisAggDocCountError = terms.getDocCountError();
            } else {
                // otherwise use the doc count of the last term in the
                // aggregation
                thisAggDocCountError = terms.getBucketsInternal().get(terms.getBucketsInternal().size() - 1).docCount;
            }
        } else {
            thisAggDocCountError = -1;
        }
        if (sumDocCountError != -1) {
            if (thisAggDocCountError == -1) {
                sumDocCountError = -1;
            } else {
                sumDocCountError += thisAggDocCountError;
            }
        }
        setDocCountError(thisAggDocCountError);
        for (B bucket : terms.getBucketsInternal()) {
            // If there is already a doc count error for this bucket
            // subtract this aggs doc count error from it to make the
            // new value for the bucket. This then means that when the
            // final error for the bucket is calculated below we account
            // for the existing error calculated in a previous reduce.
            // Note that if the error is unbounded (-1) this will be fixed
            // later in this method.
            bucket.docCountError -= thisAggDocCountError;
            List<B> bucketList = buckets.get(bucket.getKey());
            if (bucketList == null) {
                bucketList = new ArrayList<>();
                buckets.put(bucket.getKey(), bucketList);
            }
            bucketList.add(bucket);
        }
    }
    final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
    final BucketPriorityQueue<B> ordered = new BucketPriorityQueue<>(size, order.comparator(null));
    for (List<B> sameTermBuckets : buckets.values()) {
        final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
        if (sumDocCountError == -1) {
            b.docCountError = -1;
        } else {
            b.docCountError += sumDocCountError;
        }
        if (b.docCount >= minDocCount || reduceContext.isFinalReduce() == false) {
            B removed = ordered.insertWithOverflow(b);
            if (removed != null) {
                otherDocCount += removed.getDocCount();
            }
        }
    }
    B[] list = createBucketsArray(ordered.size());
    for (int i = ordered.size() - 1; i >= 0; i--) {
        list[i] = ordered.pop();
    }
    long docCountError;
    if (sumDocCountError == -1) {
        docCountError = -1;
    } else {
        docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
    }
    return create(name, Arrays.asList(list), docCountError, otherDocCount);
}
Also used : HashMap(java.util.HashMap) InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ArrayList(java.util.ArrayList) List(java.util.List) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 27 with InternalAggregation

use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.

the class InternalSignificantTerms method doReduce.

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    long globalSubsetSize = 0;
    long globalSupersetSize = 0;
    // top-level Aggregations from each shard
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
        globalSubsetSize += terms.getSubsetSize();
        globalSupersetSize += terms.getSupersetSize();
    }
    Map<String, List<B>> buckets = new HashMap<>();
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
        for (B bucket : terms.getBucketsInternal()) {
            List<B> existingBuckets = buckets.get(bucket.getKeyAsString());
            if (existingBuckets == null) {
                existingBuckets = new ArrayList<>(aggregations.size());
                buckets.put(bucket.getKeyAsString(), existingBuckets);
            }
            // Adjust the buckets with the global stats representing the
            // total size of the pots from which the stats are drawn
            existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations));
        }
    }
    SignificanceHeuristic heuristic = getSignificanceHeuristic().rewrite(reduceContext);
    final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
    BucketSignificancePriorityQueue<B> ordered = new BucketSignificancePriorityQueue<>(size);
    for (Map.Entry<String, List<B>> entry : buckets.entrySet()) {
        List<B> sameTermBuckets = entry.getValue();
        final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
        b.updateScore(heuristic);
        if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) {
            ordered.insertWithOverflow(b);
        }
    }
    B[] list = createBucketsArray(ordered.size());
    for (int i = ordered.size() - 1; i >= 0; i--) {
        list[i] = ordered.pop();
    }
    return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list));
}
Also used : HashMap(java.util.HashMap) InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ArrayList(java.util.ArrayList) List(java.util.List) SignificanceHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic) HashMap(java.util.HashMap) Map(java.util.Map)

Example 28 with InternalAggregation

use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.

the class InternalHistogram method reduceBuckets.

private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {

        @Override
        protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
            return a.current.key < b.current.key;
        }
    };
    for (InternalAggregation aggregation : aggregations) {
        InternalHistogram histogram = (InternalHistogram) aggregation;
        if (histogram.buckets.isEmpty() == false) {
            pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
        }
    }
    List<Bucket> reducedBuckets = new ArrayList<>();
    if (pq.size() > 0) {
        // list of buckets coming from different shards that have the same key
        List<Bucket> currentBuckets = new ArrayList<>();
        double key = pq.top().current.key;
        do {
            final IteratorAndCurrent top = pq.top();
            if (top.current.key != key) {
                // the key changes, reduce what we already buffered and reset the buffer for current buckets
                final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
                if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                    reducedBuckets.add(reduced);
                }
                currentBuckets.clear();
                key = top.current.key;
            }
            currentBuckets.add(top.current);
            if (top.iterator.hasNext()) {
                final Bucket next = top.iterator.next();
                assert next.key > top.current.key : "shards must return data sorted by key";
                top.current = next;
                pq.updateTop();
            } else {
                pq.pop();
            }
        } while (pq.size() > 0);
        if (currentBuckets.isEmpty() == false) {
            final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
            if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                reducedBuckets.add(reduced);
            }
        }
    }
    return reducedBuckets;
}
Also used : InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Example 29 with InternalAggregation

use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.

the class InternalRange method doReduce.

@SuppressWarnings("unchecked")
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    List<Bucket>[] rangeList = new List[ranges.size()];
    for (int i = 0; i < rangeList.length; ++i) {
        rangeList[i] = new ArrayList<>();
    }
    for (InternalAggregation aggregation : aggregations) {
        InternalRange<B, R> ranges = (InternalRange<B, R>) aggregation;
        int i = 0;
        for (Bucket range : ranges.ranges) {
            rangeList[i++].add(range);
        }
    }
    final List<B> ranges = new ArrayList<>();
    for (int i = 0; i < this.ranges.size(); ++i) {
        ranges.add((B) rangeList[i].get(0).reduce(rangeList[i], reduceContext));
    }
    return getFactory().create(name, ranges, format, keyed, pipelineAggregators(), getMetaData());
}
Also used : InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 30 with InternalAggregation

use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.

the class InternalGeoHashGrid method doReduce.

@Override
public InternalGeoHashGrid doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    LongObjectPagedHashMap<List<Bucket>> buckets = null;
    for (InternalAggregation aggregation : aggregations) {
        InternalGeoHashGrid grid = (InternalGeoHashGrid) aggregation;
        if (buckets == null) {
            buckets = new LongObjectPagedHashMap<>(grid.buckets.size(), reduceContext.bigArrays());
        }
        for (Bucket bucket : grid.buckets) {
            List<Bucket> existingBuckets = buckets.get(bucket.geohashAsLong);
            if (existingBuckets == null) {
                existingBuckets = new ArrayList<>(aggregations.size());
                buckets.put(bucket.geohashAsLong, existingBuckets);
            }
            existingBuckets.add(bucket);
        }
    }
    final int size = Math.toIntExact(reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size()));
    BucketPriorityQueue ordered = new BucketPriorityQueue(size);
    for (LongObjectPagedHashMap.Cursor<List<Bucket>> cursor : buckets) {
        List<Bucket> sameCellBuckets = cursor.value;
        ordered.insertWithOverflow(sameCellBuckets.get(0).reduce(sameCellBuckets, reduceContext));
    }
    buckets.close();
    Bucket[] list = new Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        list[i] = ordered.pop();
    }
    return new InternalGeoHashGrid(getName(), requiredSize, Arrays.asList(list), pipelineAggregators(), getMetaData());
}
Also used : LongObjectPagedHashMap(org.elasticsearch.common.util.LongObjectPagedHashMap) GeoPoint(org.elasticsearch.common.geo.GeoPoint) InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

InternalAggregation (org.elasticsearch.search.aggregations.InternalAggregation)33 ArrayList (java.util.ArrayList)23 List (java.util.List)16 Map (java.util.Map)10 InternalAggregations (org.elasticsearch.search.aggregations.InternalAggregations)10 IOException (java.io.IOException)8 Collectors (java.util.stream.Collectors)8 PipelineAggregator (org.elasticsearch.search.aggregations.pipeline.PipelineAggregator)8 StreamSupport (java.util.stream.StreamSupport)7 StreamInput (org.elasticsearch.common.io.stream.StreamInput)7 ReduceContext (org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext)7 Bucket (org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket)7 InternalSimpleValue (org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue)6 HashMap (java.util.HashMap)5 DocValueFormat (org.elasticsearch.search.DocValueFormat)5 StreamOutput (org.elasticsearch.common.io.stream.StreamOutput)4 Settings (org.elasticsearch.common.settings.Settings)4 SignificanceHeuristic (org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic)4 GapPolicy (org.elasticsearch.search.aggregations.pipeline.BucketHelpers.GapPolicy)4 BucketHelpers.resolveBucketValue (org.elasticsearch.search.aggregations.pipeline.BucketHelpers.resolveBucketValue)4