Search in sources :

Example 1 with BitIntsHolder

use of in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorer method phraseFreq.

 * Score a candidate doc for all slop-valid position-combinations (matches)
 * encountered while traversing/hopping the PhrasePositions.
 * <br> The score contribution of a match depends on the distance:
 * <br> - highest score for distance=0 (exact match).
 * <br> - score gets lower as distance gets higher.
 * <br>Example: for query "a b"~2, a document "x a b a y" can be scored twice:
 * once for "a b" (distance=0), and once for "b a" (distance=2).
 * <br>Possibly not all valid combinations are encountered, because for efficiency
 * we always propagate the least PhrasePosition. This allows to base on
 * PriorityQueue and move forward faster.
 * As result, for example, document "a b c b a"
 * would score differently for queries "a b c"~4 and "c b a"~4, although
 * they really are equivalent.
 * Similarly, for doc "a b c b a f g", query "c b"~2
 * would get same score as "g f"~2, although "c b"~2 could be matched twice.
 * We may want to fix this in the future (currently not, for performance reasons).
private float phraseFreq() throws IOException {
    // custom begins
    BitIntsHolder allPositions = new BitIntsHolder();
    BitIntsHolder positions = new BitIntsHolder();
    if (phrasePositions.length == 1) {
        // special handling for one term
        end = Integer.MIN_VALUE;
        PhrasePositions pp = phrasePositions[0];
        if (pp.position > end) {
            end = pp.position;
        int matchCount = 0;
        while (advancePP(pp)) {
            allPositions.set(pp.position + pp.offset);
            addPositions(positions, allPositions, pp.position + pp.offset, 0);
        if (!positions.isEmpty()) {
            documentsToPositionsMap.put(docID(), positions);
        return matchCount;
    if (!initPhrasePositions()) {
        return 0.0f;
    // custom begins
    for (PhrasePositions phrasePositions : this.pq) {
        allPositions.set(phrasePositions.position + phrasePositions.offset);
    // custom ends
    int numMatches = 0;
    PhrasePositions pp = pq.pop();
    int matchLength = end - pp.position;
    int next =;
    // custom – remember last matched position
    int lastEnd = this.end;
    while (advancePP(pp)) {
        if (hasRpts && !advanceRpts(pp)) {
            // pps exhausted
        allPositions.set(pp.position + pp.offset);
        if (pp.position > next) {
            // done minimizing current match-length
            if (matchLength <= slop) {
                // custom – match found, remember positions
                addPositions(positions, allPositions, lastEnd, matchLength);
            pp = pq.pop();
            next =;
            matchLength = end - pp.position;
            // custom – remember position of last match
            lastEnd = this.end;
        } else {
            int matchLength2 = end - pp.position;
            if (matchLength2 < matchLength) {
                matchLength = matchLength2;
            // custom – remember position of last match
            lastEnd = this.end;
    if (matchLength <= slop) {
        // custom – match found, remember positions
        addPositions(positions, allPositions, lastEnd, matchLength);
    // custom begins – if some positions were found then store them
    if (!positions.isEmpty()) {
        documentsToPositionsMap.put(docID(), positions);
    // custom ends
    return numMatches;
Also used : BitIntsHolder(

Example 2 with BitIntsHolder

use of in project OpenGrok by OpenGrok.

the class SuggesterSearcher method getComplexQueryData.

private ComplexQueryData getComplexQueryData(final Query query, final LeafReaderContext leafReaderContext) {
    ComplexQueryData data = new ComplexQueryData();
    if (query == null || query instanceof SuggesterQuery) {
        data.documentIds = new BitIntsHolder(0);
        return data;
    BitIntsHolder documentIds = new BitIntsHolder();
    try {
        search(query, new Collector() {

            public LeafCollector getLeafCollector(final LeafReaderContext context) {
                return new LeafCollector() {

                    final int docBase = context.docBase;

                    public void setScorer(final Scorable scorer) {
                        if (leafReaderContext == context) {
                            if (scorer instanceof PhraseScorer) {
                                data.scorer = (PhraseScorer) scorer;
                            } else {
                                try {
                                    // in #setScorer but no better way was found
                                    for (Scorer.ChildScorable childScorer : scorer.getChildren()) {
                                        if (childScorer.child instanceof PhraseScorer) {
                                            data.scorer = (PhraseScorer) childScorer.child;
                                } catch (Exception e) {
                                // ignore

                    public void collect(int doc) {
                        if (leafReaderContext == context) {
                            documentIds.set(docBase + doc);

            public ScoreMode scoreMode() {
                return ScoreMode.COMPLETE_NO_SCORES;
    } catch (IOException e) {
        if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            return null;
        } else {
            logger.log(Level.WARNING, "Could not get document ids for " + query, e);
    } catch (Exception e) {
        logger.log(Level.WARNING, "Could not get document ids for " + query, e);
    data.documentIds = documentIds;
    return data;
Also used : ScoreMode( BitIntsHolder( Scorable( SuggesterQuery(org.opengrok.suggest.query.SuggesterQuery) IOException( PhraseScorer(org.opengrok.suggest.query.PhraseScorer) IOException( LeafCollector( LeafCollector( Collector( LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 3 with BitIntsHolder

use of in project OpenGrok by OpenGrok.

the class CustomExactPhraseScorer method phraseFreq.

private int phraseFreq() throws IOException {
    // reset state
    final PostingsAndPosition[] postings = this.postings;
    for (PostingsAndPosition posting : postings) {
        posting.freq = posting.postings.freq();
        posting.pos = posting.postings.nextPosition();
        posting.upTo = 1;
    int freq = 0;
    final PostingsAndPosition lead = postings[0];
    // custom – store positions
    BitIntsHolder positions = null;
    advanceHead: while (true) {
        final int phrasePos = lead.pos - lead.offset;
        for (int j = 1; j < postings.length; ++j) {
            final PostingsAndPosition posting = postings[j];
            final int expectedPos = phrasePos + posting.offset;
            // advance up to the same position as the lead
            if (!advancePosition(posting, expectedPos)) {
                break advanceHead;
            if (posting.pos != expectedPos) {
                // we advanced too far
                if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
                    continue advanceHead;
                } else {
                    break advanceHead;
        freq += 1;
        // custom begins – found a match
        if (positions == null) {
            positions = new BitIntsHolder();
        positions.set(phrasePos + offset);
        if (lead.upTo == lead.freq) {
        lead.pos = lead.postings.nextPosition();
        lead.upTo += 1;
    // custom begin – if some positions were found then store them
    if (positions != null) {
        documentToPositionsMap.put(docID(), positions);
    return freq;
Also used : BitIntsHolder(

Example 4 with BitIntsHolder

use of in project OpenGrok by OpenGrok.

the class CustomSloppyPhraseScorerTest method test.

public static void test(final int slop, final int offset, final String[] terms, final Integer[] expectedPositions) throws IOException {
    Directory dir = new ByteBuffersDirectory();
    try (IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig())) {
        Document doc = new Document();
        doc.add(new TextField("test", "zero one two three four five six seven eight nine ten", Field.Store.NO));
    CustomPhraseQuery query = new CustomPhraseQuery(slop, "test", terms);
    try (IndexReader ir = {
        IndexSearcher is = new IndexSearcher(ir);
        Weight w = query.createWeight(is, ScoreMode.COMPLETE_NO_SCORES, 1);
        LeafReaderContext context = ir.getContext().leaves().get(0);
        Scorer scorer = w.scorer(context);
        TwoPhaseIterator it = scorer.twoPhaseIterator();
        int correctDoc = -1;
        int docId;
        while ((docId = it.approximation().nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (it.matches()) {
                correctDoc = docId;
        BitIntsHolder bs = (BitIntsHolder) ((PhraseScorer) scorer).getPositions(correctDoc);
        assertThat(toSet(bs), contains(expectedPositions));
Also used : IndexSearcher( BitIntsHolder( TwoPhaseIterator( Scorer( PhraseScorer(org.opengrok.suggest.query.PhraseScorer) Document(org.apache.lucene.document.Document) Weight( IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory( IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ByteBuffersDirectory( Directory( IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)


BitIntsHolder ( LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 PhraseScorer (org.opengrok.suggest.query.PhraseScorer)2 IOException ( Document (org.apache.lucene.document.Document)1 TextField (org.apache.lucene.document.TextField)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 Collector ( IndexSearcher ( LeafCollector ( Scorable ( ScoreMode ( Scorer ( TwoPhaseIterator ( Weight ( ByteBuffersDirectory ( Directory ( SuggesterQuery (org.opengrok.suggest.query.SuggesterQuery)1