use of org.elasticsearch.index.fielddata.SortedBinaryDocValues in project elasticsearch by elastic.
the class StringTermsAggregator method getLeafCollector.
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
final BytesRefBuilder previous = new BytesRefBuilder();
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
values.setDocument(doc);
final int valuesCount = values.count();
// SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
previous.clear();
for (int i = 0; i < valuesCount; ++i) {
final BytesRef bytes = values.valueAt(i);
if (includeExclude != null && !includeExclude.accept(bytes)) {
continue;
}
if (previous.get().equals(bytes)) {
continue;
}
long bucketOrdinal = bucketOrds.add(bytes);
if (bucketOrdinal < 0) {
// already seen
bucketOrdinal = -1 - bucketOrdinal;
collectExistingBucket(sub, doc, bucketOrdinal);
} else {
collectBucket(sub, doc, bucketOrdinal);
}
previous.copyBytes(bytes);
}
}
};
}
use of org.elasticsearch.index.fielddata.SortedBinaryDocValues in project elasticsearch by elastic.
the class StringTermsAggregator method buildAggregation.
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks
for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
// brute force
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();
for (int i = 0; i < valueCount; ++i) {
final BytesRef term = values.valueAt(i);
if (includeExclude == null || includeExclude.accept(term)) {
bucketOrds.add(term);
}
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long otherDocCount = 0;
BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
StringTerms.Bucket spare = null;
for (int i = 0; i < bucketOrds.size(); i++) {
if (spare == null) {
spare = new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
}
bucketOrds.get(i, spare.termBytes);
spare.docCount = bucketDocCount(i);
otherDocCount += spare.docCount;
spare.bucketOrd = i;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = ordered.insertWithOverflow(spare);
}
}
// Get the top buckets
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
long[] survivingBucketOrds = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
list[i] = bucket;
otherDocCount -= bucket.docCount;
}
// replay any deferred collections
runDeferredCollections(survivingBucketOrds);
// Now build the aggs
for (int i = 0; i < list.length; i++) {
final StringTerms.Bucket bucket = (StringTerms.Bucket) list[i];
bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
bucket.docCountError = 0;
}
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Aggregations