Example 1 with TermState

use of org.apache.lucene.index.TermState in project elasticsearch by elastic.

the class BlendedTermQuery method adjustDF.

private static TermContext adjustDF(IndexReaderContext readerContext, TermContext ctx, int newDocFreq) {
    assert ctx.wasBuiltFor(readerContext);
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    TermContext newCtx = new TermContext(readerContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
        newCtx.register(termState, i, newDocFreq, newTTF);
        newDocFreq = 0;
        newTTF = 0;
    return newCtx;
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) TermContext(org.apache.lucene.index.TermContext)

Example 2 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class IDVersionSegmentTermsEnum method termState.

public TermState termState() throws IOException {
    assert !eof;
    TermState ts = currentFrame.state.clone();
    //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
    return ts;
Also used : TermState(org.apache.lucene.index.TermState) BlockTermState(org.apache.lucene.codecs.BlockTermState)

Example 3 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class BlendedTermQuery method adjustFrequencies.

private static TermContext adjustFrequencies(IndexReaderContext readerContext, TermContext ctx, int artificialDf, long artificialTtf) {
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    TermContext newCtx = new TermContext(readerContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
        newCtx.register(termState, i);
    newCtx.accumulateStatistics(artificialDf, artificialTtf);
    return newCtx;
Also used : LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) TermContext(org.apache.lucene.index.TermContext)

Example 4 with TermState

use of org.apache.lucene.index.TermState in project lucene-solr by apache.

the class TopTermsRewrite method rewrite.

public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
    final int maxSize = Math.min(size, getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
    collectTerms(reader, query, new TermCollector() {

        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<>();

        private TermsEnum termsEnum;

        private BoostAttribute boostAtt;

        private ScoreTerm st;

        public void setNextEnum(TermsEnum termsEnum) {
            this.termsEnum = termsEnum;
            assert compareToLastTerm(null);
            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);

        // for assert:
        private BytesRefBuilder lastTerm;

        private boolean compareToLastTerm(BytesRef t) {
            if (lastTerm == null && t != null) {
                lastTerm = new BytesRefBuilder();
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert lastTerm.get().compareTo(t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
            return true;

        public boolean collect(BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();
            // terms in order
            assert compareToLastTerm(bytes);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && bytes.compareTo(t.bytes.get()) > 0)
                    return true;
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.boost = boost;
                visitedTerms.put(st.bytes.get(), st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    // reset the termstate! 
                } else {
                    st = new ScoreTerm(new TermContext(topReaderContext));
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
            return true;
    final B b = getTopLevelBuilder();
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes.toBytesRef());
        // We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
        // but truncate such boosts to 0.0f when building the query:
        // add to query
        addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState);
    return build(b);
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) HashMap(java.util.HashMap) IOException( Term(org.apache.lucene.index.Term) PriorityQueue(java.util.PriorityQueue) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) TermState(org.apache.lucene.index.TermState) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with TermState

use of org.apache.lucene.index.TermState in project crate by crate.

the class BlendedTermQuery method adjustDF.

private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException {
    assert ctx.wasBuiltFor(readerContext);
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    TermStates newCtx = new TermStates(readerContext);
    if (leaves != null) {
        for (int i = 0; i < len; ++i) {
            TermState termState = ctx.get(leaves.get(i));
            if (termState == null) {
            newCtx.register(termState, i, newDocFreq, newTTF);
            newDocFreq = 0;
            newTTF = 0;
    return newCtx;
Also used : TermStates(org.apache.lucene.index.TermStates) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState)


