use of org.opensearch.common.util.BytesRefHash in project OpenSearch by opensearch-project.
the class StringRareTermsAggregator method buildAggregations.
@Override
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
/*
* Collect the list of buckets, populate the filter with terms
* that are too frequent, and figure out how to merge sub-buckets.
*/
StringRareTerms.Bucket[][] rarestPerOrd = new StringRareTerms.Bucket[owningBucketOrds.length][];
SetBackedScalingCuckooFilter[] filters = new SetBackedScalingCuckooFilter[owningBucketOrds.length];
long keepCount = 0;
long[] mergeMap = new long[(int) bucketOrds.size()];
Arrays.fill(mergeMap, -1);
long offset = 0;
for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) {
try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(1, context.bigArrays())) {
filters[owningOrdIdx] = newFilter();
List<StringRareTerms.Bucket> builtBuckets = new ArrayList<>();
BytesKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]);
BytesRef scratch = new BytesRef();
while (collectedBuckets.next()) {
collectedBuckets.readValue(scratch);
long docCount = bucketDocCount(collectedBuckets.ord());
// if the key is below threshold, reinsert into the new ords
if (docCount <= maxDocCount) {
StringRareTerms.Bucket bucket = new StringRareTerms.Bucket(BytesRef.deepCopyOf(scratch), docCount, null, format);
bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(scratch);
mergeMap[(int) collectedBuckets.ord()] = bucket.bucketOrd;
builtBuckets.add(bucket);
keepCount++;
} else {
filters[owningOrdIdx].add(scratch);
}
}
rarestPerOrd[owningOrdIdx] = builtBuckets.toArray(new StringRareTerms.Bucket[0]);
offset += bucketsInThisOwningBucketToCollect.size();
}
}
/*
* Only merge/delete the ordinals if we have actually deleted one,
* to save on some redundant work.
*/
if (keepCount != mergeMap.length) {
mergeBuckets(mergeMap, offset);
if (deferringCollector != null) {
deferringCollector.mergeBuckets(mergeMap);
}
}
/*
* Now build the results!
*/
buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs);
InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length];
for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) {
Arrays.sort(rarestPerOrd[ordIdx], ORDER.comparator());
result[ordIdx] = new StringRareTerms(name, ORDER, metadata(), format, Arrays.asList(rarestPerOrd[ordIdx]), maxDocCount, filters[ordIdx]);
}
return result;
}
use of org.opensearch.common.util.BytesRefHash in project OpenSearch by opensearch-project.
the class SignificanceLookup method bytesLookup.
/**
* Get the background frequency of a {@link BytesRef} term.
*/
BackgroundFrequencyForBytes bytesLookup(BigArrays bigArrays, CardinalityUpperBound cardinality) {
if (cardinality == CardinalityUpperBound.ONE) {
return new BackgroundFrequencyForBytes() {
@Override
public long freq(BytesRef term) throws IOException {
return getBackgroundFrequency(term);
}
@Override
public void close() {
}
};
}
return new BackgroundFrequencyForBytes() {
private final BytesRefHash termToPosition = new BytesRefHash(1, bigArrays);
private LongArray positionToFreq = bigArrays.newLongArray(1, false);
@Override
public long freq(BytesRef term) throws IOException {
long position = termToPosition.add(term);
if (position < 0) {
return positionToFreq.get(-1 - position);
}
long freq = getBackgroundFrequency(term);
positionToFreq = bigArrays.grow(positionToFreq, position + 1);
positionToFreq.set(position, freq);
return freq;
}
@Override
public void close() {
Releasables.close(termToPosition, positionToFreq);
}
};
}
Aggregations