use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.
the class InternalTerms method doReduce.
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
Map<Object, List<B>> buckets = new HashMap<>();
long sumDocCountError = 0;
long otherDocCount = 0;
InternalTerms<A, B> referenceTerms = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
if (referenceTerms == null && !aggregation.getClass().equals(UnmappedTerms.class)) {
referenceTerms = terms;
}
if (referenceTerms != null && !referenceTerms.getClass().equals(terms.getClass()) && !terms.getClass().equals(UnmappedTerms.class)) {
// is of different types in different indices.
throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation [" + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different " + "types in two different indices");
}
otherDocCount += terms.getSumOfOtherDocCounts();
final long thisAggDocCountError;
if (terms.getBucketsInternal().size() < getShardSize() || InternalOrder.isTermOrder(order)) {
thisAggDocCountError = 0;
} else if (InternalOrder.isCountDesc(this.order)) {
if (terms.getDocCountError() > 0) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
thisAggDocCountError = terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
thisAggDocCountError = terms.getBucketsInternal().get(terms.getBucketsInternal().size() - 1).docCount;
}
} else {
thisAggDocCountError = -1;
}
if (sumDocCountError != -1) {
if (thisAggDocCountError == -1) {
sumDocCountError = -1;
} else {
sumDocCountError += thisAggDocCountError;
}
}
setDocCountError(thisAggDocCountError);
for (B bucket : terms.getBucketsInternal()) {
// If there is already a doc count error for this bucket
// subtract this aggs doc count error from it to make the
// new value for the bucket. This then means that when the
// final error for the bucket is calculated below we account
// for the existing error calculated in a previous reduce.
// Note that if the error is unbounded (-1) this will be fixed
// later in this method.
bucket.docCountError -= thisAggDocCountError;
List<B> bucketList = buckets.get(bucket.getKey());
if (bucketList == null) {
bucketList = new ArrayList<>();
buckets.put(bucket.getKey(), bucketList);
}
bucketList.add(bucket);
}
}
final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
final BucketPriorityQueue<B> ordered = new BucketPriorityQueue<>(size, order.comparator(null));
for (List<B> sameTermBuckets : buckets.values()) {
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
if (sumDocCountError == -1) {
b.docCountError = -1;
} else {
b.docCountError += sumDocCountError;
}
if (b.docCount >= minDocCount || reduceContext.isFinalReduce() == false) {
B removed = ordered.insertWithOverflow(b);
if (removed != null) {
otherDocCount += removed.getDocCount();
}
}
}
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
long docCountError;
if (sumDocCountError == -1) {
docCountError = -1;
} else {
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
}
return create(name, Arrays.asList(list), docCountError, otherDocCount);
}
use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.
the class InternalSignificantTerms method doReduce.
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
long globalSubsetSize = 0;
long globalSupersetSize = 0;
// top-level Aggregations from each shard
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
globalSubsetSize += terms.getSubsetSize();
globalSupersetSize += terms.getSupersetSize();
}
Map<String, List<B>> buckets = new HashMap<>();
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
for (B bucket : terms.getBucketsInternal()) {
List<B> existingBuckets = buckets.get(bucket.getKeyAsString());
if (existingBuckets == null) {
existingBuckets = new ArrayList<>(aggregations.size());
buckets.put(bucket.getKeyAsString(), existingBuckets);
}
// Adjust the buckets with the global stats representing the
// total size of the pots from which the stats are drawn
existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations));
}
}
SignificanceHeuristic heuristic = getSignificanceHeuristic().rewrite(reduceContext);
final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
BucketSignificancePriorityQueue<B> ordered = new BucketSignificancePriorityQueue<>(size);
for (Map.Entry<String, List<B>> entry : buckets.entrySet()) {
List<B> sameTermBuckets = entry.getValue();
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
b.updateScore(heuristic);
if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) {
ordered.insertWithOverflow(b);
}
}
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list));
}
use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.
the class InternalHistogram method reduceBuckets.
private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return a.current.key < b.current.key;
}
};
for (InternalAggregation aggregation : aggregations) {
InternalHistogram histogram = (InternalHistogram) aggregation;
if (histogram.buckets.isEmpty() == false) {
pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
}
}
List<Bucket> reducedBuckets = new ArrayList<>();
if (pq.size() > 0) {
// list of buckets coming from different shards that have the same key
List<Bucket> currentBuckets = new ArrayList<>();
double key = pq.top().current.key;
do {
final IteratorAndCurrent top = pq.top();
if (top.current.key != key) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
currentBuckets.clear();
key = top.current.key;
}
currentBuckets.add(top.current);
if (top.iterator.hasNext()) {
final Bucket next = top.iterator.next();
assert next.key > top.current.key : "shards must return data sorted by key";
top.current = next;
pq.updateTop();
} else {
pq.pop();
}
} while (pq.size() > 0);
if (currentBuckets.isEmpty() == false) {
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
}
}
return reducedBuckets;
}
use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.
the class InternalRange method doReduce.
@SuppressWarnings("unchecked")
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
List<Bucket>[] rangeList = new List[ranges.size()];
for (int i = 0; i < rangeList.length; ++i) {
rangeList[i] = new ArrayList<>();
}
for (InternalAggregation aggregation : aggregations) {
InternalRange<B, R> ranges = (InternalRange<B, R>) aggregation;
int i = 0;
for (Bucket range : ranges.ranges) {
rangeList[i++].add(range);
}
}
final List<B> ranges = new ArrayList<>();
for (int i = 0; i < this.ranges.size(); ++i) {
ranges.add((B) rangeList[i].get(0).reduce(rangeList[i], reduceContext));
}
return getFactory().create(name, ranges, format, keyed, pipelineAggregators(), getMetaData());
}
use of org.elasticsearch.search.aggregations.InternalAggregation in project elasticsearch by elastic.
the class InternalGeoHashGrid method doReduce.
@Override
public InternalGeoHashGrid doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
LongObjectPagedHashMap<List<Bucket>> buckets = null;
for (InternalAggregation aggregation : aggregations) {
InternalGeoHashGrid grid = (InternalGeoHashGrid) aggregation;
if (buckets == null) {
buckets = new LongObjectPagedHashMap<>(grid.buckets.size(), reduceContext.bigArrays());
}
for (Bucket bucket : grid.buckets) {
List<Bucket> existingBuckets = buckets.get(bucket.geohashAsLong);
if (existingBuckets == null) {
existingBuckets = new ArrayList<>(aggregations.size());
buckets.put(bucket.geohashAsLong, existingBuckets);
}
existingBuckets.add(bucket);
}
}
final int size = Math.toIntExact(reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size()));
BucketPriorityQueue ordered = new BucketPriorityQueue(size);
for (LongObjectPagedHashMap.Cursor<List<Bucket>> cursor : buckets) {
List<Bucket> sameCellBuckets = cursor.value;
ordered.insertWithOverflow(sameCellBuckets.get(0).reduce(sameCellBuckets, reduceContext));
}
buckets.close();
Bucket[] list = new Bucket[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
return new InternalGeoHashGrid(getName(), requiredSize, Arrays.asList(list), pipelineAggregators(), getMetaData());
}
Aggregations