Search in sources :

Example 6 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project elasticsearch by elastic.

the class InternalHistogram method reduceBuckets.

private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {

        @Override
        protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
            return a.current.key < b.current.key;
        }
    };
    for (InternalAggregation aggregation : aggregations) {
        InternalHistogram histogram = (InternalHistogram) aggregation;
        if (histogram.buckets.isEmpty() == false) {
            pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
        }
    }
    List<Bucket> reducedBuckets = new ArrayList<>();
    if (pq.size() > 0) {
        // list of buckets coming from different shards that have the same key
        List<Bucket> currentBuckets = new ArrayList<>();
        double key = pq.top().current.key;
        do {
            final IteratorAndCurrent top = pq.top();
            if (top.current.key != key) {
                // the key changes, reduce what we already buffered and reset the buffer for current buckets
                final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
                if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                    reducedBuckets.add(reduced);
                }
                currentBuckets.clear();
                key = top.current.key;
            }
            currentBuckets.add(top.current);
            if (top.iterator.hasNext()) {
                final Bucket next = top.iterator.next();
                assert next.key > top.current.key : "shards must return data sorted by key";
                top.current = next;
                pq.updateTop();
            } else {
                pq.pop();
            }
        } while (pq.size() > 0);
        if (currentBuckets.isEmpty() == false) {
            final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
            if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                reducedBuckets.add(reduced);
            }
        }
    }
    return reducedBuckets;
}
Also used : InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Example 7 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.

the class FacetFieldProcessor method findTopSlots.

/** Processes the collected data to finds the top slots, and composes it in the response NamedList. */
SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality, IntFunction<Comparable> bucketValFromSlotNumFunc, Function<Comparable, String> fieldQueryValFunc) throws IOException {
    int numBuckets = 0;
    final int off = fcontext.isShard() ? 0 : (int) freq.offset;
    // use max-int instead of max-long to avoid overflow
    long effectiveLimit = Integer.MAX_VALUE;
    if (freq.limit >= 0) {
        effectiveLimit = freq.limit;
        if (fcontext.isShard()) {
            if (freq.overrequest == -1) {
                // add over-request if this is a shard request and if we have a small offset (large offsets will already be gathering many more buckets than needed)
                if (freq.offset < 10) {
                    // default: add 10% plus 4 (to overrequest for very small limits)
                    effectiveLimit = (long) (effectiveLimit * 1.1 + 4);
                }
            } else {
                effectiveLimit += freq.overrequest;
            }
        }
    }
    final int sortMul = freq.sortDirection.getMultiplier();
    int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1);
    maxTopVals = Math.min(maxTopVals, slotCardinality);
    final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc;
    final BiPredicate<Slot, Slot> orderPredicate;
    if (indexOrderAcc != null && indexOrderAcc != sortAcc) {
        orderPredicate = (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
        };
    } else {
        orderPredicate = (a, b) -> {
            int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
            return cmp == 0 ? b.slot < a.slot : cmp < 0;
        };
    }
    final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) {

        @Override
        protected boolean lessThan(Slot a, Slot b) {
            return orderPredicate.test(a, b);
        }
    };
    // note: We avoid object allocation by having a Slot and re-using the 'bottom'.
    Slot bottom = null;
    Slot scratchSlot = new Slot();
    for (int slotNum = 0; slotNum < numSlots; slotNum++) {
        // screen out buckets not matching mincount
        if (effectiveMincount > 0) {
            int count = countAcc.getCount(slotNum);
            if (count < effectiveMincount) {
                if (count > 0)
                    // Still increment numBuckets as long as we have some count.  This is for consistency between distrib and non-distrib mode.
                    numBuckets++;
                continue;
            }
        }
        numBuckets++;
        if (bottom != null) {
            // scratchSlot is only used to hold this slotNum for the following line
            scratchSlot.slot = slotNum;
            if (orderPredicate.test(bottom, scratchSlot)) {
                bottom.slot = slotNum;
                bottom = queue.updateTop();
            }
        } else if (effectiveLimit > 0) {
            // queue not full
            Slot s = new Slot();
            s.slot = slotNum;
            queue.add(s);
            if (queue.size() >= maxTopVals) {
                bottom = queue.top();
            }
        }
    }
    assert queue.size() <= numBuckets;
    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
    if (freq.numBuckets) {
        if (!fcontext.isShard()) {
            res.add("numBuckets", numBuckets);
        } else {
            calculateNumBuckets(res);
        }
    }
    FacetDebugInfo fdebug = fcontext.getDebugInfo();
    if (fdebug != null)
        fdebug.putInfoItem("numBuckets", (long) numBuckets);
    if (freq.allBuckets) {
        SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
        // countAcc.setValues(allBuckets, allBucketsSlot);
        allBuckets.add("count", allBucketsAcc.getSpecialCount());
        // -1 slotNum is unused for SpecialSlotAcc
        allBucketsAcc.setValues(allBuckets, -1);
        // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
        res.add("allBuckets", allBuckets);
    }
    if (freq.missing) {
        // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null);
        res.add("missing", missingBucket);
    }
    // if we are deep paging, we don't have to order the highest "offset" counts.
    int collectCount = Math.max(0, queue.size() - off);
    assert collectCount <= maxTopVals;
    int[] sortedSlots = new int[collectCount];
    for (int i = collectCount - 1; i >= 0; i--) {
        sortedSlots[i] = queue.pop().slot;
    }
    ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
    res.add("buckets", bucketList);
    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
    for (int slotNum : sortedSlots) {
        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        Comparable val = bucketValFromSlotNumFunc.apply(slotNum);
        bucket.add("val", val);
        Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, fieldQueryValFunc.apply(val)) : null;
        fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
        bucketList.add(bucket);
    }
    return res;
}
Also used : Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap)

Example 8 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.

the class MultiSorter method sort.

/** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's
   *  documents into the merged segment.  The documents for each incoming leaf reader must already be sorted by the same sort!
   *  Returns null if the merge sort is not needed (segments are already in index sort order).
   **/
static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {
    // TODO: optimize if only 1 reader is incoming, though that's a rare case
    SortField[] fields = sort.getSort();
    final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
    for (int i = 0; i < fields.length; i++) {
        comparables[i] = getComparableProviders(readers, fields[i]);
    }
    int leafCount = readers.size();
    PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {

        @Override
        public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
            for (int i = 0; i < comparables.length; i++) {
                int cmp = a.values[i].compareTo(b.values[i]);
                if (cmp != 0) {
                    return cmp < 0;
                }
            }
            // tie-break by docID natural order:
            if (a.readerIndex != b.readerIndex) {
                return a.readerIndex < b.readerIndex;
            } else {
                return a.docID < b.docID;
            }
        }
    };
    PackedLongValues.Builder[] builders = new PackedLongValues.Builder[leafCount];
    for (int i = 0; i < leafCount; i++) {
        CodecReader reader = readers.get(i);
        LeafAndDocID leaf = new LeafAndDocID(i, reader.getLiveDocs(), reader.maxDoc(), comparables.length);
        for (int j = 0; j < comparables.length; j++) {
            leaf.values[j] = comparables[j][i].getComparable(leaf.docID);
            assert leaf.values[j] != null;
        }
        queue.add(leaf);
        builders[i] = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    }
    // merge sort:
    int mappedDocID = 0;
    int lastReaderIndex = 0;
    boolean isSorted = true;
    while (queue.size() != 0) {
        LeafAndDocID top = queue.top();
        if (lastReaderIndex > top.readerIndex) {
            // merge sort is needed
            isSorted = false;
        }
        lastReaderIndex = top.readerIndex;
        builders[top.readerIndex].add(mappedDocID);
        if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
            mappedDocID++;
        }
        top.docID++;
        if (top.docID < top.maxDoc) {
            for (int j = 0; j < comparables.length; j++) {
                top.values[j] = comparables[j][top.readerIndex].getComparable(top.docID);
                assert top.values[j] != null;
            }
            queue.updateTop();
        } else {
            queue.pop();
        }
    }
    if (isSorted) {
        return null;
    }
    MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
    for (int i = 0; i < leafCount; i++) {
        final PackedLongValues remapped = builders[i].build();
        final Bits liveDocs = readers.get(i).getLiveDocs();
        docMaps[i] = new MergeState.DocMap() {

            @Override
            public int get(int docID) {
                if (liveDocs == null || liveDocs.get(docID)) {
                    return (int) remapped.get(docID);
                } else {
                    return -1;
                }
            }
        };
    }
    return docMaps;
}
Also used : PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) SortField(org.apache.lucene.search.SortField) PriorityQueue(org.apache.lucene.util.PriorityQueue) DocMap(org.apache.lucene.index.MergeState.DocMap) Bits(org.apache.lucene.util.Bits) DocMap(org.apache.lucene.index.MergeState.DocMap)

Example 9 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.

the class CommonTermsQueryTest method testRandomIndex.

public void testRandomIndex() throws IOException {
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    createRandomIndex(atLeast(50), w, random().nextLong());
    w.forceMerge(1);
    DirectoryReader reader = w.getReader();
    LeafReader wrapper = getOnlyLeafReader(reader);
    String field = "body";
    Terms terms = wrapper.terms(field);
    PriorityQueue<TermAndFreq> lowFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {

        @Override
        protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
            return a.freq > b.freq;
        }
    };
    PriorityQueue<TermAndFreq> highFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {

        @Override
        protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
            return a.freq < b.freq;
        }
    };
    try {
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            if (highFreqQueue.size() < 5) {
                highFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
                lowFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
            } else {
                if (highFreqQueue.top().freq < iterator.docFreq()) {
                    highFreqQueue.top().freq = iterator.docFreq();
                    highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
                    highFreqQueue.updateTop();
                }
                if (lowFreqQueue.top().freq > iterator.docFreq()) {
                    lowFreqQueue.top().freq = iterator.docFreq();
                    lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
                    lowFreqQueue.updateTop();
                }
            }
        }
        int lowFreq = lowFreqQueue.top().freq;
        int highFreq = highFreqQueue.top().freq;
        assumeTrue("unlucky index", highFreq - 1 > lowFreq);
        List<TermAndFreq> highTerms = queueToList(highFreqQueue);
        List<TermAndFreq> lowTerms = queueToList(lowFreqQueue);
        IndexSearcher searcher = newSearcher(reader);
        Occur lowFreqOccur = randomOccur(random());
        BooleanQuery.Builder verifyQuery = new BooleanQuery.Builder();
        CommonTermsQuery cq = new CommonTermsQuery(randomOccur(random()), lowFreqOccur, highFreq - 1);
        for (TermAndFreq termAndFreq : lowTerms) {
            cq.add(new Term(field, termAndFreq.term));
            verifyQuery.add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
        }
        for (TermAndFreq termAndFreq : highTerms) {
            cq.add(new Term(field, termAndFreq.term));
        }
        TopDocs cqSearch = searcher.search(cq, reader.maxDoc());
        TopDocs verifySearch = searcher.search(verifyQuery.build(), reader.maxDoc());
        assertEquals(verifySearch.totalHits, cqSearch.totalHits);
        Set<Integer> hits = new HashSet<>();
        for (ScoreDoc doc : verifySearch.scoreDocs) {
            hits.add(doc.doc);
        }
        for (ScoreDoc doc : cqSearch.scoreDocs) {
            assertTrue(hits.remove(doc.doc));
        }
        assertTrue(hits.isEmpty());
        /*
       *  need to force merge here since QueryUtils adds checks based
       *  on leave readers which have different statistics than the top
       *  level reader if we have more than one segment. This could 
       *  result in a different query / results.
       */
        w.forceMerge(1);
        DirectoryReader reader2 = w.getReader();
        QueryUtils.check(random(), cq, newSearcher(reader2));
        reader2.close();
    } finally {
        IOUtils.close(reader, w, dir, analyzer);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) TermsEnum(org.apache.lucene.index.TermsEnum) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) PriorityQueue(org.apache.lucene.util.PriorityQueue) BooleanClause(org.apache.lucene.search.BooleanClause) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

PriorityQueue (org.apache.lucene.util.PriorityQueue)9 ArrayList (java.util.ArrayList)5 InternalAggregation (org.elasticsearch.search.aggregations.InternalAggregation)2 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Callable (java.util.concurrent.Callable)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 LeafReader (org.apache.lucene.index.LeafReader)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 DocMap (org.apache.lucene.index.MergeState.DocMap)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 Term (org.apache.lucene.index.Term)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 BooleanClause (org.apache.lucene.search.BooleanClause)1 Occur (org.apache.lucene.search.BooleanClause.Occur)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1