use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.
the class LongTermsAggregator method buildAggregation.
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks
for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
final SortedNumericDocValues values = getValues(valuesSource, ctx);
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();
for (int i = 0; i < valueCount; ++i) {
long value = values.valueAt(i);
if (longFilter == null || longFilter.accept(value)) {
bucketOrds.add(value);
}
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long otherDocCount = 0;
BucketPriorityQueue<LongTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
LongTerms.Bucket spare = null;
for (long i = 0; i < bucketOrds.size(); i++) {
if (spare == null) {
spare = new LongTerms.Bucket(0, 0, null, showTermDocCountError, 0, format);
}
spare.term = bucketOrds.get(i);
spare.docCount = bucketDocCount(i);
otherDocCount += spare.docCount;
spare.bucketOrd = i;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
}
}
// Get the top buckets
final LongTerms.Bucket[] list = new LongTerms.Bucket[ordered.size()];
long[] survivingBucketOrds = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
list[i] = bucket;
otherDocCount -= bucket.docCount;
}
runDeferredCollections(survivingBucketOrds);
// Now build the aggs
for (int i = 0; i < list.length; i++) {
list[i].aggregations = bucketAggregations(list[i].bucketOrd);
list[i].docCountError = 0;
}
return new LongTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.
the class GlobalOrdinalsStringTermsAggregator method buildAggregation.
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
if (globalOrds == null) {
// no context in this reader
return buildEmptyAggregation();
}
final int size;
if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
} else {
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
}
long otherDocCount = 0;
BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
}
otherDocCount += bucketDocCount;
spare.globalOrd = globalTermOrd;
spare.bucketOrd = bucketOrd;
spare.docCount = bucketDocCount;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = ordered.insertWithOverflow(spare);
if (spare == null) {
spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
}
}
}
// Get the top buckets
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
long[] survivingBucketOrds = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final OrdBucket bucket = (OrdBucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
BytesRef scratch = new BytesRef();
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
list[i].bucketOrd = bucket.bucketOrd;
otherDocCount -= list[i].docCount;
}
//replay any deferred collections
runDeferredCollections(survivingBucketOrds);
//Now build the aggs
for (int i = 0; i < list.length; i++) {
StringTerms.Bucket bucket = list[i];
bucket.aggregations = bucket.docCount == 0 ? bucketEmptyAggregations() : bucketAggregations(bucket.bucketOrd);
bucket.docCountError = 0;
}
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.
the class InternalTerms method doReduce.
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
Map<Object, List<B>> buckets = new HashMap<>();
long sumDocCountError = 0;
long otherDocCount = 0;
InternalTerms<A, B> referenceTerms = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
if (referenceTerms == null && !aggregation.getClass().equals(UnmappedTerms.class)) {
referenceTerms = terms;
}
if (referenceTerms != null && !referenceTerms.getClass().equals(terms.getClass()) && !terms.getClass().equals(UnmappedTerms.class)) {
// is of different types in different indices.
throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation [" + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different " + "types in two different indices");
}
otherDocCount += terms.getSumOfOtherDocCounts();
final long thisAggDocCountError;
if (terms.getBucketsInternal().size() < getShardSize() || InternalOrder.isTermOrder(order)) {
thisAggDocCountError = 0;
} else if (InternalOrder.isCountDesc(this.order)) {
if (terms.getDocCountError() > 0) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
thisAggDocCountError = terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
thisAggDocCountError = terms.getBucketsInternal().get(terms.getBucketsInternal().size() - 1).docCount;
}
} else {
thisAggDocCountError = -1;
}
if (sumDocCountError != -1) {
if (thisAggDocCountError == -1) {
sumDocCountError = -1;
} else {
sumDocCountError += thisAggDocCountError;
}
}
setDocCountError(thisAggDocCountError);
for (B bucket : terms.getBucketsInternal()) {
// If there is already a doc count error for this bucket
// subtract this aggs doc count error from it to make the
// new value for the bucket. This then means that when the
// final error for the bucket is calculated below we account
// for the existing error calculated in a previous reduce.
// Note that if the error is unbounded (-1) this will be fixed
// later in this method.
bucket.docCountError -= thisAggDocCountError;
List<B> bucketList = buckets.get(bucket.getKey());
if (bucketList == null) {
bucketList = new ArrayList<>();
buckets.put(bucket.getKey(), bucketList);
}
bucketList.add(bucket);
}
}
final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
final BucketPriorityQueue<B> ordered = new BucketPriorityQueue<>(size, order.comparator(null));
for (List<B> sameTermBuckets : buckets.values()) {
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
if (sumDocCountError == -1) {
b.docCountError = -1;
} else {
b.docCountError += sumDocCountError;
}
if (b.docCount >= minDocCount || reduceContext.isFinalReduce() == false) {
B removed = ordered.insertWithOverflow(b);
if (removed != null) {
otherDocCount += removed.getDocCount();
}
}
}
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
long docCountError;
if (sumDocCountError == -1) {
docCountError = -1;
} else {
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
}
return create(name, Arrays.asList(list), docCountError, otherDocCount);
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue in project elasticsearch by elastic.
the class StringTermsAggregator method buildAggregation.
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks
for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
// brute force
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();
for (int i = 0; i < valueCount; ++i) {
final BytesRef term = values.valueAt(i);
if (includeExclude == null || includeExclude.accept(term)) {
bucketOrds.add(term);
}
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long otherDocCount = 0;
BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
StringTerms.Bucket spare = null;
for (int i = 0; i < bucketOrds.size(); i++) {
if (spare == null) {
spare = new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
}
bucketOrds.get(i, spare.termBytes);
spare.docCount = bucketDocCount(i);
otherDocCount += spare.docCount;
spare.bucketOrd = i;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = ordered.insertWithOverflow(spare);
}
}
// Get the top buckets
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
long[] survivingBucketOrds = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
list[i] = bucket;
otherDocCount -= bucket.docCount;
}
// replay any deferred collections
runDeferredCollections(survivingBucketOrds);
// Now build the aggs
for (int i = 0; i < list.length; i++) {
final StringTerms.Bucket bucket = (StringTerms.Bucket) list[i];
bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
bucket.docCountError = 0;
}
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Aggregations