use of org.opensearch.search.aggregations.InternalAggregation in project OpenSearch by opensearch-project.
the class InternalMappedRareTerms method reduce.
@Override
public InternalAggregation reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
Map<Object, List<B>> buckets = new HashMap<>();
InternalRareTerms<A, B> referenceTerms = null;
SetBackedScalingCuckooFilter filter = null;
for (InternalAggregation aggregation : aggregations) {
// and save some type casting headaches later.
if (aggregation.isMapped() == false) {
continue;
}
@SuppressWarnings("unchecked") InternalRareTerms<A, B> terms = (InternalRareTerms<A, B>) aggregation;
if (referenceTerms == null && aggregation.getClass().equals(UnmappedRareTerms.class) == false) {
referenceTerms = terms;
}
if (referenceTerms != null && referenceTerms.getClass().equals(terms.getClass()) == false && terms.getClass().equals(UnmappedRareTerms.class) == false) {
// is of different types in different indices.
throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation [" + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different " + "types in two different indices");
}
for (B bucket : terms.getBuckets()) {
List<B> bucketList = buckets.computeIfAbsent(bucket.getKey(), k -> new ArrayList<>());
bucketList.add(bucket);
}
SetBackedScalingCuckooFilter otherFilter = ((InternalMappedRareTerms) aggregation).getFilter();
if (filter == null) {
filter = new SetBackedScalingCuckooFilter(otherFilter);
} else {
filter.merge(otherFilter);
}
}
final List<B> rare = new ArrayList<>();
for (List<B> sameTermBuckets : buckets.values()) {
final B b = reduceBucket(sameTermBuckets, reduceContext);
if ((b.getDocCount() <= maxDocCount && containsTerm(filter, b) == false)) {
rare.add(b);
reduceContext.consumeBucketsAndMaybeBreak(1);
} else if (b.getDocCount() > maxDocCount) {
// this term has gone over threshold while merging, so add it to the filter.
// Note this may happen during incremental reductions too
addToFilter(filter, b);
}
}
CollectionUtil.introSort(rare, order.comparator());
return createWithFilter(name, rare, filter);
}
use of org.opensearch.search.aggregations.InternalAggregation in project OpenSearch by opensearch-project.
the class InternalTerms method reduceLegacy.
private List<B> reduceLegacy(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
Map<Object, List<B>> bucketMap = new HashMap<>();
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
if (terms.getBuckets().isEmpty() == false) {
for (B bucket : terms.getBuckets()) {
List<B> bucketList = bucketMap.get(bucket.getKey());
if (bucketList == null) {
bucketList = new ArrayList<>();
bucketMap.put(bucket.getKey(), bucketList);
}
bucketList.add(bucket);
}
}
}
List<B> reducedBuckets = new ArrayList<>();
for (List<B> sameTermBuckets : bucketMap.values()) {
final B b = reduceBucket(sameTermBuckets, reduceContext);
reducedBuckets.add(b);
}
return reducedBuckets;
}
use of org.opensearch.search.aggregations.InternalAggregation in project OpenSearch by opensearch-project.
the class InternalTerms method reduceMergeSort.
private List<B> reduceMergeSort(List<InternalAggregation> aggregations, BucketOrder thisReduceOrder, ReduceContext reduceContext) {
assert isKeyOrder(thisReduceOrder);
final Comparator<MultiBucketsAggregation.Bucket> cmp = thisReduceOrder.comparator();
final PriorityQueue<IteratorAndCurrent<B>> pq = new PriorityQueue<IteratorAndCurrent<B>>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent<B> a, IteratorAndCurrent<B> b) {
return cmp.compare(a.current(), b.current()) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
if (terms.getBuckets().isEmpty() == false) {
assert reduceOrder.equals(reduceOrder);
pq.add(new IteratorAndCurrent(terms.getBuckets().iterator()));
}
}
List<B> reducedBuckets = new ArrayList<>();
// list of buckets coming from different shards that have the same key
List<B> currentBuckets = new ArrayList<>();
B lastBucket = null;
while (pq.size() > 0) {
final IteratorAndCurrent<B> top = pq.top();
assert lastBucket == null || cmp.compare(top.current(), lastBucket) >= 0;
if (lastBucket != null && cmp.compare(top.current(), lastBucket) != 0) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final B reduced = reduceBucket(currentBuckets, reduceContext);
reducedBuckets.add(reduced);
currentBuckets.clear();
}
lastBucket = top.current();
currentBuckets.add(top.current());
if (top.hasNext()) {
top.next();
assert cmp.compare(top.current(), lastBucket) > 0 : "shards must return data sorted by key";
pq.updateTop();
} else {
pq.pop();
}
}
if (currentBuckets.isEmpty() == false) {
final B reduced = reduceBucket(currentBuckets, reduceContext);
reducedBuckets.add(reduced);
}
return reducedBuckets;
}
use of org.opensearch.search.aggregations.InternalAggregation in project OpenSearch by opensearch-project.
the class StringRareTermsAggregator method buildAggregations.
@Override
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
/*
* Collect the list of buckets, populate the filter with terms
* that are too frequent, and figure out how to merge sub-buckets.
*/
StringRareTerms.Bucket[][] rarestPerOrd = new StringRareTerms.Bucket[owningBucketOrds.length][];
SetBackedScalingCuckooFilter[] filters = new SetBackedScalingCuckooFilter[owningBucketOrds.length];
long keepCount = 0;
long[] mergeMap = new long[(int) bucketOrds.size()];
Arrays.fill(mergeMap, -1);
long offset = 0;
for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) {
try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(1, context.bigArrays())) {
filters[owningOrdIdx] = newFilter();
List<StringRareTerms.Bucket> builtBuckets = new ArrayList<>();
BytesKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]);
BytesRef scratch = new BytesRef();
while (collectedBuckets.next()) {
collectedBuckets.readValue(scratch);
long docCount = bucketDocCount(collectedBuckets.ord());
// if the key is below threshold, reinsert into the new ords
if (docCount <= maxDocCount) {
StringRareTerms.Bucket bucket = new StringRareTerms.Bucket(BytesRef.deepCopyOf(scratch), docCount, null, format);
bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(scratch);
mergeMap[(int) collectedBuckets.ord()] = bucket.bucketOrd;
builtBuckets.add(bucket);
keepCount++;
} else {
filters[owningOrdIdx].add(scratch);
}
}
rarestPerOrd[owningOrdIdx] = builtBuckets.toArray(new StringRareTerms.Bucket[0]);
offset += bucketsInThisOwningBucketToCollect.size();
}
}
/*
* Only merge/delete the ordinals if we have actually deleted one,
* to save on some redundant work.
*/
if (keepCount != mergeMap.length) {
mergeBuckets(mergeMap, offset);
if (deferringCollector != null) {
deferringCollector.mergeBuckets(mergeMap);
}
}
/*
* Now build the results!
*/
buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs);
InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length];
for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) {
Arrays.sort(rarestPerOrd[ordIdx], ORDER.comparator());
result[ordIdx] = new StringRareTerms(name, ORDER, metadata(), format, Arrays.asList(rarestPerOrd[ordIdx]), maxDocCount, filters[ordIdx]);
}
return result;
}
use of org.opensearch.search.aggregations.InternalAggregation in project OpenSearch by opensearch-project.
the class DoubleTerms method reduce.
@Override
public InternalAggregation reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
boolean promoteToDouble = false;
for (InternalAggregation agg : aggregations) {
if (agg instanceof LongTerms && (((LongTerms) agg).format == DocValueFormat.RAW || ((LongTerms) agg).format == DocValueFormat.UNSIGNED_LONG_SHIFTED)) {
/*
* this terms agg mixes longs and doubles, we must promote longs to doubles to make the internal aggs
* compatible
*/
promoteToDouble = true;
break;
}
}
if (promoteToDouble == false) {
return super.reduce(aggregations, reduceContext);
}
List<InternalAggregation> newAggs = new ArrayList<>(aggregations.size());
for (InternalAggregation agg : aggregations) {
if (agg instanceof LongTerms) {
DoubleTerms dTerms = LongTerms.convertLongTermsToDouble((LongTerms) agg, format);
newAggs.add(dTerms);
} else {
newAggs.add(agg);
}
}
return newAggs.get(0).reduce(newAggs, reduceContext);
}
Aggregations