Search in sources :

Example 6 with TermState

use of org.apache.lucene.index.TermState in project elasticsearch by elastic.

the class BlendedTermQuery method adjustTTF.

private TermContext adjustTTF(IndexReaderContext readerContext, TermContext termContext, long sumTTF) {
    assert termContext.wasBuiltFor(readerContext);
    if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
        return termContext;
    }
    TermContext newTermContext = new TermContext(readerContext);
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    int df = termContext.docFreq();
    long ttf = sumTTF;
    for (int i = 0; i < len; i++) {
        TermState termState = termContext.get(i);
        if (termState == null) {
            continue;
        }
        newTermContext.register(termState, i, df, ttf);
        df = 0;
        ttf = 0;
    }
    return newTermContext;
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) TermContext(org.apache.lucene.index.TermContext)

Example 7 with TermState

use of org.apache.lucene.index.TermState in project elasticsearch by elastic.

the class AllTermQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }
    };
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Set(java.util.Set) Similarity(org.apache.lucene.search.similarities.Similarity) Explanation(org.apache.lucene.search.Explanation) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Terms(org.apache.lucene.index.Terms) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TermStatistics(org.apache.lucene.search.TermStatistics) TermContext(org.apache.lucene.index.TermContext) Weight(org.apache.lucene.search.Weight) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 8 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class SegmentTermsEnum method termState.

@Override
public TermState termState() throws IOException {
    assert !eof;
    currentFrame.decodeMetaData();
    TermState ts = currentFrame.state.clone();
    //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
    return ts;
}
Also used : TermState(org.apache.lucene.index.TermState) BlockTermState(org.apache.lucene.codecs.BlockTermState)

Example 9 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class MultiTermQueryConstantScoreWrapper method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        /** Try to collect terms from the given terms enum and return true iff all
       *  terms could be collected. If {@code false} is returned, the enum is
       *  left positioned on the next term. */
        private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
            final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
            for (int i = 0; i < threshold; ++i) {
                final BytesRef term = termsEnum.next();
                if (term == null) {
                    return true;
                }
                TermState state = termsEnum.termState();
                terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
            }
            return termsEnum.next() == null;
        }

        /**
       * On the given leaf context, try to either rewrite to a disjunction if
       * there are few terms, or build a bitset containing matching docs.
       */
        private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(query.field);
            if (terms == null) {
                // field does not exist
                return new WeightOrDocIdSet((DocIdSet) null);
            }
            final TermsEnum termsEnum = query.getTermsEnum(terms);
            assert termsEnum != null;
            PostingsEnum docs = null;
            final List<TermAndState> collectedTerms = new ArrayList<>();
            if (collectTerms(context, termsEnum, collectedTerms)) {
                // build a boolean query
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                for (TermAndState t : collectedTerms) {
                    final TermContext termContext = new TermContext(searcher.getTopReaderContext());
                    termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
                    bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
                }
                Query q = new ConstantScoreQuery(bq.build());
                final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
                return new WeightOrDocIdSet(weight);
            }
            // Too many terms: go back to the terms we already collected and start building the bit set
            DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
            if (collectedTerms.isEmpty() == false) {
                TermsEnum termsEnum2 = terms.iterator();
                for (TermAndState t : collectedTerms) {
                    termsEnum2.seekExact(t.term, t.state);
                    docs = termsEnum2.postings(docs, PostingsEnum.NONE);
                    builder.add(docs);
                }
            }
            // Then keep filling the bit set with remaining terms
            do {
                docs = termsEnum.postings(docs, PostingsEnum.NONE);
                builder.add(docs);
            } while (termsEnum.next() != null);
            return new WeightOrDocIdSet(builder.build());
        }

        private Scorer scorer(DocIdSet set) throws IOException {
            if (set == null) {
                return null;
            }
            final DocIdSetIterator disi = set.iterator();
            if (disi == null) {
                return null;
            }
            return new ConstantScoreScorer(this, score(), disi);
        }

        @Override
        public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.bulkScorer(context);
            } else {
                final Scorer scorer = scorer(weightOrBitSet.set);
                if (scorer == null) {
                    return null;
                }
                return new DefaultBulkScorer(scorer);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.scorer(context);
            } else {
                return scorer(weightOrBitSet.set);
            }
        }
    };
}
Also used : DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) ArrayList(java.util.ArrayList) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ArrayList(java.util.ArrayList) List(java.util.List) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) TermState(org.apache.lucene.index.TermState) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Example 10 with TermState

use of org.apache.lucene.index.TermState in project crate by crate.

the class BlendedTermQuery method adjustTTF.

private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termContext, long sumTTF) throws IOException {
    assert termContext.wasBuiltFor(readerContext);
    if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
        return termContext;
    }
    TermStates newTermContext = new TermStates(readerContext);
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    int df = termContext.docFreq();
    long ttf = sumTTF;
    if (leaves != null) {
        for (int i = 0; i < len; i++) {
            TermState termState = termContext.get(leaves.get(i));
            if (termState == null) {
                continue;
            }
            newTermContext.register(termState, i, df, ttf);
            df = 0;
            ttf = 0;
        }
    }
    return newTermContext;
}
Also used : TermStates(org.apache.lucene.index.TermStates) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState)

Aggregations

TermState (org.apache.lucene.index.TermState)10 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 TermContext (org.apache.lucene.index.TermContext)6 TermsEnum (org.apache.lucene.index.TermsEnum)3 BlockTermState (org.apache.lucene.codecs.BlockTermState)2 PostingsEnum (org.apache.lucene.index.PostingsEnum)2 Term (org.apache.lucene.index.Term)2 TermStates (org.apache.lucene.index.TermStates)2 Terms (org.apache.lucene.index.Terms)2 BytesRef (org.apache.lucene.util.BytesRef)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1 Set (java.util.Set)1 CollectionStatistics (org.apache.lucene.search.CollectionStatistics)1 Explanation (org.apache.lucene.search.Explanation)1 TermQuery (org.apache.lucene.search.TermQuery)1 TermStatistics (org.apache.lucene.search.TermStatistics)1