Search in sources :

Example 1 with TermState

use of org.apache.lucene.index.TermState in project elasticsearch by elastic.

the class BlendedTermQuery method adjustDF.

private static TermContext adjustDF(IndexReaderContext readerContext, TermContext ctx, int newDocFreq) {
    assert ctx.wasBuiltFor(readerContext);
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    }
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermContext newCtx = new TermContext(readerContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
            continue;
        }
        newCtx.register(termState, i, newDocFreq, newTTF);
        newDocFreq = 0;
        newTTF = 0;
    }
    return newCtx;
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) TermContext(org.apache.lucene.index.TermContext)

Example 2 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class IDVersionSegmentTermsEnum method termState.

@Override
public TermState termState() throws IOException {
    assert !eof;
    currentFrame.decodeMetaData();
    TermState ts = currentFrame.state.clone();
    //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
    return ts;
}
Also used : TermState(org.apache.lucene.index.TermState) BlockTermState(org.apache.lucene.codecs.BlockTermState)

Example 3 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class BlendedTermQuery method adjustFrequencies.

private static TermContext adjustFrequencies(IndexReaderContext readerContext, TermContext ctx, int artificialDf, long artificialTtf) {
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermContext newCtx = new TermContext(readerContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
            continue;
        }
        newCtx.register(termState, i);
    }
    newCtx.accumulateStatistics(artificialDf, artificialTtf);
    return newCtx;
}
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) TermContext(org.apache.lucene.index.TermContext)

Example 4 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class TopTermsRewrite method rewrite.

@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
    final int maxSize = Math.min(size, getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
    collectTerms(reader, query, new TermCollector() {

        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<>();

        private TermsEnum termsEnum;

        private BoostAttribute boostAtt;

        private ScoreTerm st;

        @Override
        public void setNextEnum(TermsEnum termsEnum) {
            this.termsEnum = termsEnum;
            assert compareToLastTerm(null);
            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRefBuilder lastTerm;

        private boolean compareToLastTerm(BytesRef t) {
            if (lastTerm == null && t != null) {
                lastTerm = new BytesRefBuilder();
                lastTerm.append(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert lastTerm.get().compareTo(t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();
            // terms in order
            assert compareToLastTerm(bytes);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && bytes.compareTo(t.bytes.get()) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes.get(), st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes.get());
                    // reset the termstate! 
                    st.termState.clear();
                } else {
                    st = new ScoreTerm(new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes.get());
                }
            }
            return true;
        }
    });
    final B b = getTopLevelBuilder();
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes.toBytesRef());
        // We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
        // but truncate such boosts to 0.0f when building the query:
        // add to query
        addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState);
    }
    return build(b);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) HashMap(java.util.HashMap) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) PriorityQueue(java.util.PriorityQueue) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) TermState(org.apache.lucene.index.TermState) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with TermState

use of org.apache.lucene.index.TermState in project crate by crate.

the class BlendedTermQuery method adjustDF.

private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException {
    assert ctx.wasBuiltFor(readerContext);
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    }
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermStates newCtx = new TermStates(readerContext);
    if (leaves != null) {
        for (int i = 0; i < len; ++i) {
            TermState termState = ctx.get(leaves.get(i));
            if (termState == null) {
                continue;
            }
            newCtx.register(termState, i, newDocFreq, newTTF);
            newDocFreq = 0;
            newTTF = 0;
        }
    }
    return newCtx;
}
Also used : TermStates(org.apache.lucene.index.TermStates) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState)

Aggregations

TermState (org.apache.lucene.index.TermState)10 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 TermContext (org.apache.lucene.index.TermContext)6 TermsEnum (org.apache.lucene.index.TermsEnum)3 BlockTermState (org.apache.lucene.codecs.BlockTermState)2 PostingsEnum (org.apache.lucene.index.PostingsEnum)2 Term (org.apache.lucene.index.Term)2 TermStates (org.apache.lucene.index.TermStates)2 Terms (org.apache.lucene.index.Terms)2 BytesRef (org.apache.lucene.util.BytesRef)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1 Set (java.util.Set)1 CollectionStatistics (org.apache.lucene.search.CollectionStatistics)1 Explanation (org.apache.lucene.search.Explanation)1 TermQuery (org.apache.lucene.search.TermQuery)1 TermStatistics (org.apache.lucene.search.TermStatistics)1