Search in sources :

Example 1 with BitIntsHolder

use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorer method phraseFreq.

/**
 * Score a candidate doc for all slop-valid position-combinations (matches)
 * encountered while traversing/hopping the PhrasePositions.
 * <br> The score contribution of a match depends on the distance:
 * <br> - highest score for distance=0 (exact match).
 * <br> - score gets lower as distance gets higher.
 * <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
 * once for "a b" (distance=0), and once for "b a" (distance=2).
 * <br>Possibly not all valid combinations are encountered, because for efficiency
 * we always propagate the least PhrasePosition. This allows to base on
 * PriorityQueue and move forward faster.
 * As result, for example, document "a b c b a"
 * would score differently for queries "a b c"~4 and "c b a"~4, although
 * they really are equivalent.
 * Similarly, for doc "a b c b a f g", query "c b"~2
 * would get same score as "g f"~2, although "c b"~2 could be matched twice.
 * We may want to fix this in the future (currently not, for performance reasons).
 */
private float phraseFreq() throws IOException {
    // custom begins
    BitIntsHolder allPositions = new BitIntsHolder();
    BitIntsHolder positions = new BitIntsHolder();
    if (phrasePositions.length == 1) {
        // special handling for one term
        end = Integer.MIN_VALUE;
        PhrasePositions pp = phrasePositions[0];
        pp.firstPosition();
        if (pp.position > end) {
            end = pp.position;
        }
        int matchCount = 0;
        while (advancePP(pp)) {
            allPositions.set(pp.position + pp.offset);
            addPositions(positions, allPositions, pp.position + pp.offset, 0);
            matchCount++;
        }
        if (!positions.isEmpty()) {
            documentsToPositionsMap.put(docID(), positions);
        }
        return matchCount;
    }
    if (!initPhrasePositions()) {
        return 0.0f;
    }
    // custom begins
    for (PhrasePositions phrasePositions : this.pq) {
        allPositions.set(phrasePositions.position + phrasePositions.offset);
    }
    // custom ends
    int numMatches = 0;
    PhrasePositions pp = pq.pop();
    int matchLength = end - pp.position;
    int next = pq.top().position;
    // custom – remember last matched position
    int lastEnd = this.end;
    while (advancePP(pp)) {
        if (hasRpts && !advanceRpts(pp)) {
            // pps exhausted
            break;
        }
        allPositions.set(pp.position + pp.offset);
        if (pp.position > next) {
            // done minimizing current match-length
            if (matchLength <= slop) {
                numMatches++;
                // custom – match found, remember positions
                addPositions(positions, allPositions, lastEnd, matchLength);
            }
            pq.add(pp);
            pp = pq.pop();
            next = pq.top().position;
            matchLength = end - pp.position;
            // custom – remember position of last match
            lastEnd = this.end;
        } else {
            int matchLength2 = end - pp.position;
            if (matchLength2 < matchLength) {
                matchLength = matchLength2;
            }
            // custom – remember position of last match
            lastEnd = this.end;
        }
    }
    if (matchLength <= slop) {
        numMatches++;
        // custom – match found, remember positions
        addPositions(positions, allPositions, lastEnd, matchLength);
    }
    // custom begins – if some positions were found then store them
    if (!positions.isEmpty()) {
        documentsToPositionsMap.put(docID(), positions);
    }
    // custom ends
    return numMatches;
}
Also used : BitIntsHolder(org.opengrok.suggest.query.data.BitIntsHolder)

Example 2 with BitIntsHolder

use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.

the class SuggesterSearcher method getComplexQueryData.

private ComplexQueryData getComplexQueryData(final Query query, final LeafReaderContext leafReaderContext) {
    ComplexQueryData data = new ComplexQueryData();
    if (query == null || query instanceof SuggesterQuery) {
        data.documentIds = new BitIntsHolder(0);
        return data;
    }
    BitIntsHolder documentIds = new BitIntsHolder();
    try {
        search(query, new Collector() {

            @Override
            public LeafCollector getLeafCollector(final LeafReaderContext context) {
                return new LeafCollector() {

                    final int docBase = context.docBase;

                    @Override
                    public void setScorer(final Scorable scorer) {
                        if (leafReaderContext == context) {
                            if (scorer instanceof PhraseScorer) {
                                data.scorer = (PhraseScorer) scorer;
                            } else {
                                try {
                                    // in #setScorer but no better way was found
                                    for (Scorer.ChildScorable childScorer : scorer.getChildren()) {
                                        if (childScorer.child instanceof PhraseScorer) {
                                            data.scorer = (PhraseScorer) childScorer.child;
                                        }
                                    }
                                } catch (Exception e) {
                                // ignore
                                }
                            }
                        }
                    }

                    @Override
                    public void collect(int doc) {
                        if (leafReaderContext == context) {
                            documentIds.set(docBase + doc);
                        }
                    }
                };
            }

            @Override
            public ScoreMode scoreMode() {
                return ScoreMode.COMPLETE_NO_SCORES;
            }
        });
    } catch (IOException e) {
        if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            return null;
        } else {
            logger.log(Level.WARNING, "Could not get document ids for " + query, e);
        }
    } catch (Exception e) {
        logger.log(Level.WARNING, "Could not get document ids for " + query, e);
    }
    data.documentIds = documentIds;
    return data;
}
Also used : ScoreMode(org.apache.lucene.search.ScoreMode) BitIntsHolder(org.opengrok.suggest.query.data.BitIntsHolder) Scorable(org.apache.lucene.search.Scorable) SuggesterQuery(org.opengrok.suggest.query.SuggesterQuery) IOException(java.io.IOException) PhraseScorer(org.opengrok.suggest.query.PhraseScorer) IOException(java.io.IOException) LeafCollector(org.apache.lucene.search.LeafCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 3 with BitIntsHolder

use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.

the class CustomExactPhraseScorer method phraseFreq.

private int phraseFreq() throws IOException {
    // reset state
    final PostingsAndPosition[] postings = this.postings;
    for (PostingsAndPosition posting : postings) {
        posting.freq = posting.postings.freq();
        posting.pos = posting.postings.nextPosition();
        posting.upTo = 1;
    }
    int freq = 0;
    final PostingsAndPosition lead = postings[0];
    // custom – store positions
    BitIntsHolder positions = null;
    advanceHead: while (true) {
        final int phrasePos = lead.pos - lead.offset;
        for (int j = 1; j < postings.length; ++j) {
            final PostingsAndPosition posting = postings[j];
            final int expectedPos = phrasePos + posting.offset;
            // advance up to the same position as the lead
            if (!advancePosition(posting, expectedPos)) {
                break advanceHead;
            }
            if (posting.pos != expectedPos) {
                // we advanced too far
                if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
                    continue advanceHead;
                } else {
                    break advanceHead;
                }
            }
        }
        freq += 1;
        // custom begins – found a match
        if (positions == null) {
            positions = new BitIntsHolder();
        }
        positions.set(phrasePos + offset);
        if (lead.upTo == lead.freq) {
            break;
        }
        lead.pos = lead.postings.nextPosition();
        lead.upTo += 1;
    }
    // custom begin – if some positions were found then store them
    if (positions != null) {
        documentToPositionsMap.put(docID(), positions);
    }
    return freq;
}
Also used : BitIntsHolder(org.opengrok.suggest.query.data.BitIntsHolder)

Example 4 with BitIntsHolder

use of org.opengrok.suggest.query.data.BitIntsHolder in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorerTest method test.

public static void test(final int slop, final int offset, final String[] terms, final Integer[] expectedPositions) throws IOException {
    Directory dir = new ByteBuffersDirectory();
    try (IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig())) {
        Document doc = new Document();
        doc.add(new TextField("test", "zero one two three four five six seven eight nine ten", Field.Store.NO));
        iw.addDocument(doc);
    }
    CustomPhraseQuery query = new CustomPhraseQuery(slop, "test", terms);
    query.setOffset(offset);
    try (IndexReader ir = DirectoryReader.open(dir)) {
        IndexSearcher is = new IndexSearcher(ir);
        Weight w = query.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1);
        LeafReaderContext context = ir.getContext().leaves().get(0);
        Scorer scorer = w.scorer(context);
        TwoPhaseIterator it = scorer.twoPhaseIterator();
        int correctDoc = -1;
        int docId;
        while ((docId = it.approximation().nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (it.matches()) {
                correctDoc = docId;
            }
        }
        BitIntsHolder bs = (BitIntsHolder) ((PhraseScorer) scorer).getPositions(correctDoc);
        assertThat(toSet(bs), contains(expectedPositions));
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BitIntsHolder(org.opengrok.suggest.query.data.BitIntsHolder) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) Scorer(org.apache.lucene.search.Scorer) PhraseScorer(org.opengrok.suggest.query.PhraseScorer) Document(org.apache.lucene.document.Document) Weight(org.apache.lucene.search.Weight) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

BitIntsHolder (org.opengrok.suggest.query.data.BitIntsHolder)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 PhraseScorer (org.opengrok.suggest.query.PhraseScorer)2 IOException (java.io.IOException)1 Document (org.apache.lucene.document.Document)1 TextField (org.apache.lucene.document.TextField)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 Collector (org.apache.lucene.search.Collector)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 LeafCollector (org.apache.lucene.search.LeafCollector)1 Scorable (org.apache.lucene.search.Scorable)1 ScoreMode (org.apache.lucene.search.ScoreMode)1 Scorer (org.apache.lucene.search.Scorer)1 TwoPhaseIterator (org.apache.lucene.search.TwoPhaseIterator)1 Weight (org.apache.lucene.search.Weight)1 ByteBuffersDirectory (org.apache.lucene.store.ByteBuffersDirectory)1 Directory (org.apache.lucene.store.Directory)1 SuggesterQuery (org.opengrok.suggest.query.SuggesterQuery)1