Search in sources :

Example 1 with LongHash

use of org.opensearch.common.util.LongHash in project OpenSearch by opensearch-project.

the class BestBucketsDeferringCollector method prepareSelectedBuckets.

/**
 * Replay the wrapped collector, but only on a selection of buckets.
 */
@Override
public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
    if (finished == false) {
        throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called");
    }
    if (this.selectedBuckets != null) {
        throw new IllegalStateException("Already been replayed");
    }
    this.selectedBuckets = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
    for (long ord : selectedBuckets) {
        this.selectedBuckets.add(ord);
    }
    boolean needsScores = scoreMode().needsScores();
    Weight weight = null;
    if (needsScores) {
        Query query = isGlobal ? new MatchAllDocsQuery() : searchContext.query();
        weight = searchContext.searcher().createWeight(searchContext.searcher().rewrite(query), ScoreMode.COMPLETE, 1f);
    }
    for (Entry entry : entries) {
        assert entry.docDeltas.size() > 0 : "segment should have at least one document to replay, got 0";
        try {
            final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
            DocIdSetIterator scoreIt = null;
            if (needsScores) {
                Scorer scorer = weight.scorer(entry.context);
                // We don't need to check if the scorer is null
                // since we are sure that there are documents to replay (entry.docDeltas it not empty).
                scoreIt = scorer.iterator();
                leafCollector.setScorer(scorer);
            }
            final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
            final PackedLongValues.Iterator buckets = entry.buckets.iterator();
            int doc = 0;
            for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
                doc += (int) docDeltaIterator.next();
                final long bucket = buckets.next();
                final long rebasedBucket = this.selectedBuckets.find(bucket);
                if (rebasedBucket != -1) {
                    if (needsScores) {
                        if (scoreIt.docID() < doc) {
                            scoreIt.advance(doc);
                        }
                        // aggregations should only be replayed on matching documents
                        assert scoreIt.docID() == doc;
                    }
                    leafCollector.collect(doc, rebasedBucket);
                }
            }
        } catch (CollectionTerminatedException e) {
        // collection was terminated prematurely
        // continue with the following leaf
        }
    }
    collector.postCollection();
}
Also used : PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LongHash(org.opensearch.common.util.LongHash) Scorer(org.apache.lucene.search.Scorer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Weight(org.apache.lucene.search.Weight) LeafBucketCollector(org.opensearch.search.aggregations.LeafBucketCollector) CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 2 with LongHash

use of org.opensearch.common.util.LongHash in project OpenSearch by opensearch-project.

the class LongRareTermsAggregator method buildAggregations.

@Override
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
    /*
         * Collect the list of buckets, populate the filter with terms
         * that are too frequent, and figure out how to merge sub-buckets.
         */
    LongRareTerms.Bucket[][] rarestPerOrd = new LongRareTerms.Bucket[owningBucketOrds.length][];
    SetBackedScalingCuckooFilter[] filters = new SetBackedScalingCuckooFilter[owningBucketOrds.length];
    long keepCount = 0;
    long[] mergeMap = new long[(int) bucketOrds.size()];
    Arrays.fill(mergeMap, -1);
    long offset = 0;
    for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) {
        try (LongHash bucketsInThisOwningBucketToCollect = new LongHash(1, context.bigArrays())) {
            filters[owningOrdIdx] = newFilter();
            List<LongRareTerms.Bucket> builtBuckets = new ArrayList<>();
            LongKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]);
            while (collectedBuckets.next()) {
                long docCount = bucketDocCount(collectedBuckets.ord());
                // if the key is below threshold, reinsert into the new ords
                if (docCount <= maxDocCount) {
                    LongRareTerms.Bucket bucket = new LongRareTerms.Bucket(collectedBuckets.value(), docCount, null, format);
                    bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(collectedBuckets.value());
                    mergeMap[(int) collectedBuckets.ord()] = bucket.bucketOrd;
                    builtBuckets.add(bucket);
                    keepCount++;
                } else {
                    filters[owningOrdIdx].add(collectedBuckets.value());
                }
            }
            rarestPerOrd[owningOrdIdx] = builtBuckets.toArray(new LongRareTerms.Bucket[0]);
            offset += bucketsInThisOwningBucketToCollect.size();
        }
    }
    /*
         * Only merge/delete the ordinals if we have actually deleted one,
         * to save on some redundant work.
         */
    if (keepCount != mergeMap.length) {
        mergeBuckets(mergeMap, offset);
        if (deferringCollector != null) {
            deferringCollector.mergeBuckets(mergeMap);
        }
    }
    /*
         * Now build the results!
         */
    buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs);
    InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length];
    for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) {
        Arrays.sort(rarestPerOrd[ordIdx], ORDER.comparator());
        result[ordIdx] = new LongRareTerms(name, ORDER, metadata(), format, Arrays.asList(rarestPerOrd[ordIdx]), maxDocCount, filters[ordIdx]);
    }
    return result;
}
Also used : LongHash(org.opensearch.common.util.LongHash) ArrayList(java.util.ArrayList) InternalAggregation(org.opensearch.search.aggregations.InternalAggregation) SetBackedScalingCuckooFilter(org.opensearch.common.util.SetBackedScalingCuckooFilter)

Example 3 with LongHash

use of org.opensearch.common.util.LongHash in project OpenSearch by opensearch-project.

the class SignificanceLookup method longLookup.

/**
 * Get the background frequency of a {@code long} term.
 */
BackgroundFrequencyForLong longLookup(BigArrays bigArrays, CardinalityUpperBound cardinality) {
    if (cardinality == CardinalityUpperBound.ONE) {
        return new BackgroundFrequencyForLong() {

            @Override
            public long freq(long term) throws IOException {
                return getBackgroundFrequency(term);
            }

            @Override
            public void close() {
            }
        };
    }
    return new BackgroundFrequencyForLong() {

        private final LongHash termToPosition = new LongHash(1, bigArrays);

        private LongArray positionToFreq = bigArrays.newLongArray(1, false);

        @Override
        public long freq(long term) throws IOException {
            long position = termToPosition.add(term);
            if (position < 0) {
                return positionToFreq.get(-1 - position);
            }
            long freq = getBackgroundFrequency(term);
            positionToFreq = bigArrays.grow(positionToFreq, position + 1);
            positionToFreq.set(position, freq);
            return freq;
        }

        @Override
        public void close() {
            Releasables.close(termToPosition, positionToFreq);
        }
    };
}
Also used : LongArray(org.opensearch.common.util.LongArray) LongHash(org.opensearch.common.util.LongHash)

Aggregations

LongHash (org.opensearch.common.util.LongHash)3 ArrayList (java.util.ArrayList)1 CollectionTerminatedException (org.apache.lucene.search.CollectionTerminatedException)1 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 Query (org.apache.lucene.search.Query)1 Scorer (org.apache.lucene.search.Scorer)1 Weight (org.apache.lucene.search.Weight)1 PackedLongValues (org.apache.lucene.util.packed.PackedLongValues)1 LongArray (org.opensearch.common.util.LongArray)1 SetBackedScalingCuckooFilter (org.opensearch.common.util.SetBackedScalingCuckooFilter)1 InternalAggregation (org.opensearch.search.aggregations.InternalAggregation)1 LeafBucketCollector (org.opensearch.search.aggregations.LeafBucketCollector)1