Search in sources :

Example 6 with SignificanceHeuristic

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.

the class InternalSignificantTerms method doReduce.

@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    long globalSubsetSize = 0;
    long globalSupersetSize = 0;
    // top-level Aggregations from each shard
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
        globalSubsetSize += terms.getSubsetSize();
        globalSupersetSize += terms.getSupersetSize();
    }
    Map<String, List<B>> buckets = new HashMap<>();
    for (InternalAggregation aggregation : aggregations) {
        @SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
        for (B bucket : terms.getBucketsInternal()) {
            List<B> existingBuckets = buckets.get(bucket.getKeyAsString());
            if (existingBuckets == null) {
                existingBuckets = new ArrayList<>(aggregations.size());
                buckets.put(bucket.getKeyAsString(), existingBuckets);
            }
            // Adjust the buckets with the global stats representing the
            // total size of the pots from which the stats are drawn
            existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations));
        }
    }
    SignificanceHeuristic heuristic = getSignificanceHeuristic().rewrite(reduceContext);
    final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
    BucketSignificancePriorityQueue<B> ordered = new BucketSignificancePriorityQueue<>(size);
    for (Map.Entry<String, List<B>> entry : buckets.entrySet()) {
        List<B> sameTermBuckets = entry.getValue();
        final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
        b.updateScore(heuristic);
        if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) {
            ordered.insertWithOverflow(b);
        }
    }
    B[] list = createBucketsArray(ordered.size());
    for (int i = ordered.size() - 1; i >= 0; i--) {
        list[i] = ordered.pop();
    }
    return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list));
}
Also used : HashMap(java.util.HashMap) InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ArrayList(java.util.ArrayList) List(java.util.List) SignificanceHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SignificanceHeuristic (org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic)6 MutualInformation (org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation)4 ArrayList (java.util.ArrayList)3 ChiSquare (org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare)3 GND (org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)3 JLHScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore)3 List (java.util.List)2 BytesRef (org.apache.lucene.util.BytesRef)2 NamedWriteableRegistry (org.elasticsearch.common.io.stream.NamedWriteableRegistry)2 PercentageScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Collections.emptyList (java.util.Collections.emptyList)1 Collections.emptyMap (java.util.Collections.emptyMap)1 Collections.singletonList (java.util.Collections.singletonList)1 Collections.unmodifiableList (java.util.Collections.unmodifiableList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1