Example 21 with PriorityQueue

use of java.util.PriorityQueue in project lucene-solr by apache.

the class NearestNeighbor method nearest.

// TODO: can we somehow share more with, or simply directly use, the LatLonPointDistanceComparator?  It's really doing the same thing as
// our hitQueue...
public static NearestHit[] nearest(double pointLat, double pointLon, List<BKDReader> readers, List<Bits> liveDocs, List<Integer> docBases, final int n) throws IOException {
    //System.out.println("NEAREST: readers=" + readers + " liveDocs=" + liveDocs + " pointLat=" + pointLat + " pointLon=" + pointLon);
    // Holds closest collected points seen so far:
    // TODO: if we used lucene's PQ we could just updateTop instead of poll/offer:
    final PriorityQueue<NearestHit> hitQueue = new PriorityQueue<>(n, new Comparator<NearestHit>() {

        public int compare(NearestHit a, NearestHit b) {
            // sort by opposite distanceMeters natural order
            int cmp =, b.distanceMeters);
            if (cmp != 0) {
                return -cmp;
            // tie-break by higher docID:
            return b.docID - a.docID;
    // Holds all cells, sorted by closest to the point:
    PriorityQueue<Cell> cellQueue = new PriorityQueue<>();
    NearestVisitor visitor = new NearestVisitor(hitQueue, n, pointLat, pointLon);
    List<BKDReader.IntersectState> states = new ArrayList<>();
    // Add root cell for each reader into the queue:
    int bytesPerDim = -1;
    for (int i = 0; i < readers.size(); i++) {
        BKDReader reader = readers.get(i);
        if (bytesPerDim == -1) {
            bytesPerDim = reader.getBytesPerDimension();
        } else if (bytesPerDim != reader.getBytesPerDimension()) {
            throw new IllegalStateException("bytesPerDim changed from " + bytesPerDim + " to " + reader.getBytesPerDimension() + " across readers");
        byte[] minPackedValue = reader.getMinPackedValue();
        byte[] maxPackedValue = reader.getMaxPackedValue();
        IntersectState state = reader.getIntersectState(visitor);
        cellQueue.offer(new Cell(state.index, i, reader.getMinPackedValue(), reader.getMaxPackedValue(), approxBestDistance(minPackedValue, maxPackedValue, pointLat, pointLon)));
    while (cellQueue.size() > 0) {
        Cell cell = cellQueue.poll();
        //System.out.println("  visit " + cell);
        // TODO: if we replace approxBestDistance with actualBestDistance, we can put an opto here to break once this "best" cell is fully outside of the hitQueue bottom's radius:
        BKDReader reader = readers.get(cell.readerIndex);
        if (cell.index.isLeafNode()) {
            //System.out.println("    leaf");
            // Leaf block: visit all points and possibly collect them:
            visitor.curDocBase = docBases.get(cell.readerIndex);
            visitor.curLiveDocs = liveDocs.get(cell.readerIndex);
            reader.visitLeafBlockValues(cell.index, states.get(cell.readerIndex));
        //System.out.println("    now " + hitQueue.size() + " hits");
        } else {
            //System.out.println("    non-leaf");
            // Non-leaf block: split into two cells and put them back into the queue:
            double cellMinLat = decodeLatitude(cell.minPacked, 0);
            double cellMinLon = decodeLongitude(cell.minPacked, Integer.BYTES);
            double cellMaxLat = decodeLatitude(cell.maxPacked, 0);
            double cellMaxLon = decodeLongitude(cell.maxPacked, Integer.BYTES);
            if (cellMaxLat < visitor.minLat || visitor.maxLat < cellMinLat || ((cellMaxLon < visitor.minLon || visitor.maxLon < cellMinLon) && cellMaxLon < visitor.minLon2)) {
                // this cell is outside our search bbox; don't bother exploring any more
            BytesRef splitValue = BytesRef.deepCopyOf(cell.index.getSplitDimValue());
            int splitDim = cell.index.getSplitDim();
            // we must clone the index so that we we can recurse left and right "concurrently":
            IndexTree newIndex = cell.index.clone();
            byte[] splitPackedValue = cell.maxPacked.clone();
            System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim * bytesPerDim, bytesPerDim);
            cellQueue.offer(new Cell(cell.index, cell.readerIndex, cell.minPacked, splitPackedValue, approxBestDistance(cell.minPacked, splitPackedValue, pointLat, pointLon)));
            splitPackedValue = cell.minPacked.clone();
            System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim * bytesPerDim, bytesPerDim);
            cellQueue.offer(new Cell(newIndex, cell.readerIndex, splitPackedValue, cell.maxPacked, approxBestDistance(splitPackedValue, cell.maxPacked, pointLat, pointLon)));
    NearestHit[] hits = new NearestHit[hitQueue.size()];
    int downTo = hitQueue.size() - 1;
    while (hitQueue.size() != 0) {
        hits[downTo] = hitQueue.poll();
    return hits;
Example 22 with PriorityQueue

use of java.util.PriorityQueue in project lucene-solr by apache.

the class WordBreakSpellChecker method suggestWordCombinations.

   * <p>
   * Generate suggestions by combining one or more of the passed-in terms into
   * single words. The returned {@link CombineSuggestion} contains both a
   * {@link SuggestWord} and also an array detailing which passed-in terms were
   * involved in creating this combination. The scores returned are equal to the
   * number of word combinations needed, also one less than the length of the
   * array {@link CombineSuggestion#originalTermIndexes}. Generally, a
   * suggestion with a lower score is preferred over a higher score.
   * </p>
   * <p>
   * To prevent two adjacent terms from being combined (for instance, if one is
   * mandatory and the other is prohibited), separate the two terms with
   * {@link WordBreakSpellChecker#SEPARATOR_TERM}
   * </p>
   * <p>
   * When suggestMode equals {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX}, each
   * suggestion will include at least one term not in the index.
   * </p>
   * <p>
   * When suggestMode equals {@link SuggestMode#SUGGEST_MORE_POPULAR}, each
   * suggestion will have the same, or better frequency than the most-popular
   * included term.
   * </p>
   * @return an array of words generated by combining original terms
   * @throws IOException If there is a low-level I/O error.
public CombineSuggestion[] suggestWordCombinations(Term[] terms, int maxSuggestions, IndexReader ir, SuggestMode suggestMode) throws IOException {
    if (maxSuggestions < 1) {
        return new CombineSuggestion[0];
    int[] origFreqs = null;
    if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
        origFreqs = new int[terms.length];
        for (int i = 0; i < terms.length; i++) {
            origFreqs[i] = ir.docFreq(terms[i]);
    int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
    Comparator<CombineSuggestionWrapper> queueComparator = new CombinationsThenFreqComparator();
    Queue<CombineSuggestionWrapper> suggestions = new PriorityQueue<>(queueInitialCapacity, queueComparator);
    int thisTimeEvaluations = 0;
    for (int i = 0; i < terms.length - 1; i++) {
        if (terms[i].equals(SEPARATOR_TERM)) {
        String leftTermText = terms[i].text();
        int leftTermLength = leftTermText.codePointCount(0, leftTermText.length());
        if (leftTermLength > maxCombineWordLength) {
        int maxFreq = 0;
        int minFreq = Integer.MAX_VALUE;
        if (origFreqs != null) {
            maxFreq = origFreqs[i];
            minFreq = origFreqs[i];
        String combinedTermText = leftTermText;
        int combinedLength = leftTermLength;
        for (int j = i + 1; j < terms.length && j - i <= maxChanges; j++) {
            if (terms[j].equals(SEPARATOR_TERM)) {
            String rightTermText = terms[j].text();
            int rightTermLength = rightTermText.codePointCount(0, rightTermText.length());
            combinedTermText += rightTermText;
            combinedLength += rightTermLength;
            if (combinedLength > maxCombineWordLength) {
            if (origFreqs != null) {
                maxFreq = Math.max(maxFreq, origFreqs[j]);
                minFreq = Math.min(minFreq, origFreqs[j]);
            Term combinedTerm = new Term(terms[0].field(), combinedTermText);
            int combinedTermFreq = ir.docFreq(combinedTerm);
            if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR || combinedTermFreq >= maxFreq) {
                if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX || minFreq == 0) {
                    if (combinedTermFreq >= minSuggestionFrequency) {
                        int[] origIndexes = new int[j - i + 1];
                        origIndexes[0] = i;
                        for (int k = 1; k < origIndexes.length; k++) {
                            origIndexes[k] = i + k;
                        SuggestWord word = new SuggestWord();
                        word.freq = combinedTermFreq;
                        word.score = origIndexes.length - 1;
                        word.string = combinedTerm.text();
                        CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(new CombineSuggestion(word, origIndexes), (origIndexes.length - 1));
                        if (suggestions.size() > maxSuggestions) {
            if (thisTimeEvaluations == maxEvaluations) {
    CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions.size()];
    for (int i = suggestions.size() - 1; i >= 0; i--) {
        combineSuggestions[i] = suggestions.remove().combineSuggestion;
    return combineSuggestions;
Example 23 with PriorityQueue

use of java.util.PriorityQueue in project lucene-solr by apache.

the class DirectSpellChecker method suggestSimilar.

   * Provide spelling corrections based on several parameters.
   * @param term The term to suggest spelling corrections for
   * @param numSug The maximum number of spelling corrections
   * @param ir The index reader to fetch the candidate spelling corrections from
   * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
   * @param editDistance The maximum edit distance candidates are allowed to have
   * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
   * @param spare a chars scratch
   * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
   * @throws IOException If I/O related errors occur
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance, float accuracy, final CharsRefBuilder spare) throws IOException {
    AttributeSource atts = new AttributeSource();
    MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
    Terms terms = MultiFields.getTerms(ir, term.field());
    if (terms == null) {
        return Collections.emptyList();
    FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance - 1), true);
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
    BytesRef queryTerm = new BytesRef(term.text());
    BytesRef candidateTerm;
    ScoreTerm st = new ScoreTerm();
    BoostAttribute boostAtt = e.attributes().addAttribute(BoostAttribute.class);
    while ((candidateTerm = != null) {
        // For FuzzyQuery, boost is the score:
        float score = boostAtt.getBoost();
        // ignore uncompetitive hits
        if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
        // ignore exact match of the same term
        if (queryTerm.bytesEquals(candidateTerm)) {
        int df = e.docFreq();
        // check docFreq if required
        if (df <= docfreq) {
        final String termAsString;
        if (distance == INTERNAL_LEVENSHTEIN) {
            // delay creating strings until the end
            termAsString = null;
        } else {
            termAsString = spare.toString();
            score = distance.getDistance(term.text(), termAsString);
        if (score < accuracy) {
        // add new entry in PQ
        st.term = BytesRef.deepCopyOf(candidateTerm);
        st.boost = score;
        st.docfreq = df;
        st.termAsString = termAsString;
        st.score = score;
        // possibly drop entries from queue
        st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
        maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
    return stQueue;
Example 24 with PriorityQueue

use of java.util.PriorityQueue in project lucene-solr by apache.

the class FieldHighlighter method highlightOffsetsEnums.

// algorithm: treat sentence snippets as miniature documents
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
    PassageScorer scorer = passageScorer;
    BreakIterator breakIterator = this.breakIterator;
    final int contentLength = breakIterator.getText().getEndIndex();
    PriorityQueue<OffsetsEnum> offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1);
    for (OffsetsEnum off : offsetsEnums) {
        off.setWeight(scorer.weight(contentLength, off.freq()));
        // go to first position
    // a sentinel for termination
    offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY));
    PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
        if (left.getScore() < right.getScore()) {
            return -1;
        } else if (left.getScore() > right.getScore()) {
            return 1;
        } else {
            return left.getStartOffset() - right.getStartOffset();
    // the current passage in-progress.  Will either get reset or added to queue.
    Passage passage = new Passage();
    OffsetsEnum off;
    while ((off = offsetsEnumQueue.poll()) != null) {
        int start = off.startOffset();
        if (start == -1) {
            throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
        int end = off.endOffset();
        // saw this term, it won't cause a passage to be added to passageQueue or anything.
        assert EMPTY.startOffset() == Integer.MAX_VALUE;
        if (start < contentLength && end > contentLength) {
        // See if this term should be part of a new passage.
        if (start >= passage.getEndOffset()) {
            if (passage.getStartOffset() >= 0) {
                // true if this passage has terms; otherwise couldn't find any (yet)
                // finalize passage
                passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
                // new sentence: first add 'passage' to queue
                if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
                    // can't compete, just reset it
                } else {
                    if (passageQueue.size() > maxPassages) {
                        passage = passageQueue.poll();
                    } else {
                        passage = new Passage();
            // if we exceed limit, we are done
            if (start >= contentLength) {
            // advance breakIterator
            passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
            passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
        // Add this term to the passage.
        int tf = 0;
        while (true) {
            // a reference; safe to refer to
            BytesRef term = off.getTerm();
            assert term != null;
            passage.addMatch(start, end, term);
            // see if there are multiple occurrences of this term in this passage. If so, add them.
            if (!off.hasMorePositions()) {
                // No more in the entire text. Already removed from pq; move on
            start = off.startOffset();
            end = off.endOffset();
            if (start >= passage.getEndOffset() || end > contentLength) {
                // it's beyond this passage
        passage.setScore(passage.getScore() + off.getWeight() *, passage.getEndOffset() - passage.getStartOffset()));
    Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
    for (Passage p : passages) {
    // sort in ascending order
    Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
    return passages;
Example 25 with PriorityQueue

use of java.util.PriorityQueue in project intellij-community by JetBrains.

the class LinearBekGraphBuilder method getFragment.

private MergeFragment getFragment(int leftChild, int rightChild, int parent) {
    MergeFragment fragment = new MergeFragment(parent, leftChild, rightChild);
    int leftLi = myGraphLayout.getLayoutIndex(leftChild);
    int rightLi = myGraphLayout.getLayoutIndex(rightChild);
    int rowsCount = 1;
    int blockSize = 1;
    PriorityQueue<GraphEdge> queue = new PriorityQueue<>(MAX_BLOCK_SIZE, new GraphEdgeComparator());
    queue.addAll(myLinearBekGraph.getAdjacentEdges(rightChild, EdgeFilter.NORMAL_DOWN));
    @Nullable Set<Integer> magicSet = null;
    while (!queue.isEmpty()) {
        GraphEdge nextEdge = queue.poll();
        Integer next = nextEdge.getDownNodeIndex();
        Integer upNodeIndex = nextEdge.getUpNodeIndex();
        // can not happen
        assert upNodeIndex != null;
        if (next == null) {
            // allow very long edges down
        if (next == leftChild) {
            // found first child
        } else if (next == rightChild + rowsCount) {
            // all is fine, continuing
            queue.addAll(myLinearBekGraph.getAdjacentEdges(next, EdgeFilter.NORMAL_DOWN));
        } else if (next > rightChild + rowsCount && next < leftChild) {
            rowsCount = next - rightChild + 1;
            queue.addAll(myLinearBekGraph.getAdjacentEdges(next, EdgeFilter.NORMAL_DOWN));
        } else if (next > leftChild) {
            int li = myGraphLayout.getLayoutIndex(next);
            if (leftLi > rightLi && !fragment.isMergeWithOldCommit()) {
                if (next > leftChild + MAGIC_SET_SIZE) {
                    return null;
                if (magicSet == null) {
                    magicSet = calculateMagicSet(leftChild);
                if (magicSet.contains(next)) {
                    fragment.addTailEdge(upNodeIndex, next);
                } else {
                    return null;
            } else {
                if ((li > leftLi && li < rightLi) || (li == leftLi)) {
                    fragment.addTailEdge(upNodeIndex, next);
                } else {
                    if (li >= rightLi) {
                        return null;
                    } else {
                        if (next > leftChild + MAGIC_SET_SIZE) {
                            if (!fragment.hasTailEdge(upNodeIndex) && !fragment.isBody(upNodeIndex))
                                return null;
                        } else {
                            if (magicSet == null) {
                                magicSet = calculateMagicSet(leftChild);
                            if (magicSet.contains(next)) {
                                fragment.addTailEdge(upNodeIndex, next);
                            } else {
                                return null;
        if (blockSize >= MAX_BLOCK_SIZE) {
            return null;
    if (fragment.getTails().isEmpty()) {
        // this can happen if we ran into initial import
        return null;
    return fragment;
Also used : PriorityQueue(java.util.PriorityQueue) GraphEdge(com.intellij.vcs.log.graph.api.elements.GraphEdge) Nullable(org.jetbrains.annotations.Nullable) Nullable(org.jetbrains.annotations.Nullable)


