Search in sources :

Example 16 with TermContext

use of org.apache.lucene.index.TermContext in project lucene-solr by apache.

the class MultiTermQueryConstantScoreWrapper method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        /** Try to collect terms from the given terms enum and return true iff all
       *  terms could be collected. If {@code false} is returned, the enum is
       *  left positioned on the next term. */
        private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
            final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
            for (int i = 0; i < threshold; ++i) {
                final BytesRef term = termsEnum.next();
                if (term == null) {
                    return true;
                }
                TermState state = termsEnum.termState();
                terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
            }
            return termsEnum.next() == null;
        }

        /**
       * On the given leaf context, try to either rewrite to a disjunction if
       * there are few terms, or build a bitset containing matching docs.
       */
        private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(query.field);
            if (terms == null) {
                // field does not exist
                return new WeightOrDocIdSet((DocIdSet) null);
            }
            final TermsEnum termsEnum = query.getTermsEnum(terms);
            assert termsEnum != null;
            PostingsEnum docs = null;
            final List<TermAndState> collectedTerms = new ArrayList<>();
            if (collectTerms(context, termsEnum, collectedTerms)) {
                // build a boolean query
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                for (TermAndState t : collectedTerms) {
                    final TermContext termContext = new TermContext(searcher.getTopReaderContext());
                    termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
                    bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
                }
                Query q = new ConstantScoreQuery(bq.build());
                final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
                return new WeightOrDocIdSet(weight);
            }
            // Too many terms: go back to the terms we already collected and start building the bit set
            DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
            if (collectedTerms.isEmpty() == false) {
                TermsEnum termsEnum2 = terms.iterator();
                for (TermAndState t : collectedTerms) {
                    termsEnum2.seekExact(t.term, t.state);
                    docs = termsEnum2.postings(docs, PostingsEnum.NONE);
                    builder.add(docs);
                }
            }
            // Then keep filling the bit set with remaining terms
            do {
                docs = termsEnum.postings(docs, PostingsEnum.NONE);
                builder.add(docs);
            } while (termsEnum.next() != null);
            return new WeightOrDocIdSet(builder.build());
        }

        private Scorer scorer(DocIdSet set) throws IOException {
            if (set == null) {
                return null;
            }
            final DocIdSetIterator disi = set.iterator();
            if (disi == null) {
                return null;
            }
            return new ConstantScoreScorer(this, score(), disi);
        }

        @Override
        public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.bulkScorer(context);
            } else {
                final Scorer scorer = scorer(weightOrBitSet.set);
                if (scorer == null) {
                    return null;
                }
                return new DefaultBulkScorer(scorer);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.scorer(context);
            } else {
                return scorer(weightOrBitSet.set);
            }
        }
    };
}
Also used : DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) ArrayList(java.util.ArrayList) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ArrayList(java.util.ArrayList) List(java.util.List) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) TermState(org.apache.lucene.index.TermState) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Example 17 with TermContext

use of org.apache.lucene.index.TermContext in project lucene-solr by apache.

the class TermInSetQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public void extractTerms(Set<Term> terms) {
        // no-op
        // This query is for abuse cases when the number of terms is too high to
        // run efficiently as a BooleanQuery. So likewise we hide its terms in
        // order to protect highlighters
        }

        /**
       * On the given leaf context, try to either rewrite to a disjunction if
       * there are few matching terms, or build a bitset containing matching docs.
       */
        private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
            final LeafReader reader = context.reader();
            final Fields fields = reader.fields();
            Terms terms = fields.terms(field);
            if (terms == null) {
                return null;
            }
            TermsEnum termsEnum = terms.iterator();
            PostingsEnum docs = null;
            TermIterator iterator = termData.iterator();
            // We will first try to collect up to 'threshold' terms into 'matchingTerms'
            // if there are two many terms, we will fall back to building the 'builder'
            final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
            assert termData.size() > threshold : "Query should have been rewritten";
            List<TermAndState> matchingTerms = new ArrayList<>(threshold);
            DocIdSetBuilder builder = null;
            for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
                assert field.equals(iterator.field());
                if (termsEnum.seekExact(term)) {
                    if (matchingTerms == null) {
                        docs = termsEnum.postings(docs, PostingsEnum.NONE);
                        builder.add(docs);
                    } else if (matchingTerms.size() < threshold) {
                        matchingTerms.add(new TermAndState(field, termsEnum));
                    } else {
                        assert matchingTerms.size() == threshold;
                        builder = new DocIdSetBuilder(reader.maxDoc(), terms);
                        docs = termsEnum.postings(docs, PostingsEnum.NONE);
                        builder.add(docs);
                        for (TermAndState t : matchingTerms) {
                            t.termsEnum.seekExact(t.term, t.state);
                            docs = t.termsEnum.postings(docs, PostingsEnum.NONE);
                            builder.add(docs);
                        }
                        matchingTerms = null;
                    }
                }
            }
            if (matchingTerms != null) {
                assert builder == null;
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                for (TermAndState t : matchingTerms) {
                    final TermContext termContext = new TermContext(searcher.getTopReaderContext());
                    termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
                    bq.add(new TermQuery(new Term(t.field, t.term), termContext), Occur.SHOULD);
                }
                Query q = new ConstantScoreQuery(bq.build());
                final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
                return new WeightOrDocIdSet(weight);
            } else {
                assert builder != null;
                return new WeightOrDocIdSet(builder.build());
            }
        }

        private Scorer scorer(DocIdSet set) throws IOException {
            if (set == null) {
                return null;
            }
            final DocIdSetIterator disi = set.iterator();
            if (disi == null) {
                return null;
            }
            return new ConstantScoreScorer(this, score(), disi);
        }

        @Override
        public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet == null) {
                return null;
            } else if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.bulkScorer(context);
            } else {
                final Scorer scorer = scorer(weightOrBitSet.set);
                if (scorer == null) {
                    return null;
                }
                return new DefaultBulkScorer(scorer);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final WeightOrDocIdSet weightOrBitSet = rewrite(context);
            if (weightOrBitSet == null) {
                return null;
            } else if (weightOrBitSet.weight != null) {
                return weightOrBitSet.weight.scorer(context);
            } else {
                return scorer(weightOrBitSet.set);
            }
        }
    };
}
Also used : SortedSet(java.util.SortedSet) Set(java.util.Set) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) PrefixCodedTerms(org.apache.lucene.index.PrefixCodedTerms) Terms(org.apache.lucene.index.Terms) TermIterator(org.apache.lucene.index.PrefixCodedTerms.TermIterator) Term(org.apache.lucene.index.Term) Fields(org.apache.lucene.index.Fields) DocIdSetBuilder(org.apache.lucene.util.DocIdSetBuilder)

Example 18 with TermContext

use of org.apache.lucene.index.TermContext in project lucene-solr by apache.

the class ScoringRewrite method rewrite.

@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
    final B builder = getTopLevelBuilder();
    final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
    collectTerms(reader, query, col);
    final int size = col.terms.size();
    if (size > 0) {
        final int[] sort = col.terms.sort();
        final float[] boost = col.array.boost;
        final TermContext[] termStates = col.array.termState;
        for (int i = 0; i < size; i++) {
            final int pos = sort[i];
            final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
            assert reader.docFreq(term) == termStates[pos].docFreq();
            addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
        }
    }
    return build(builder);
}
Also used : Term(org.apache.lucene.index.Term) TermContext(org.apache.lucene.index.TermContext) BytesRef(org.apache.lucene.util.BytesRef)

Example 19 with TermContext

use of org.apache.lucene.index.TermContext in project lucene-solr by apache.

the class SpanTermQuery method createWeight.

@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    final TermContext context;
    final IndexReaderContext topContext = searcher.getTopReaderContext();
    if (termContext == null || termContext.wasBuiltFor(topContext) == false) {
        context = TermContext.build(topContext, term);
    } else {
        context = termContext;
    }
    return new SpanTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null, boost);
}
Also used : TermContext(org.apache.lucene.index.TermContext) IndexReaderContext(org.apache.lucene.index.IndexReaderContext)

Example 20 with TermContext

use of org.apache.lucene.index.TermContext in project lucene-solr by apache.

the class ShardSearchingTestBase method getNodeTermStats.

// Mock: in a real env, this would hit the wire and get
// term stats from remote node
Map<Term, TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException {
    final NodeState node = nodes[nodeID];
    final Map<Term, TermStatistics> stats = new HashMap<>();
    final IndexSearcher s = node.searchers.acquire(version);
    if (s == null) {
        throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
    }
    try {
        for (Term term : terms) {
            final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term);
            stats.put(term, s.termStatistics(term, termContext));
        }
    } finally {
        node.searchers.release(s);
    }
    return stats;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Term(org.apache.lucene.index.Term) TermContext(org.apache.lucene.index.TermContext)

Aggregations

TermContext (org.apache.lucene.index.TermContext)21 Term (org.apache.lucene.index.Term)10 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)8 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)6 TermState (org.apache.lucene.index.TermState)6 TermsEnum (org.apache.lucene.index.TermsEnum)6 BytesRef (org.apache.lucene.util.BytesRef)6 Terms (org.apache.lucene.index.Terms)5 TermQuery (org.apache.lucene.search.TermQuery)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Query (org.apache.lucene.search.Query)4 IOException (java.io.IOException)3 PostingsEnum (org.apache.lucene.index.PostingsEnum)3 CollectionStatistics (org.apache.lucene.search.CollectionStatistics)3 TermStatistics (org.apache.lucene.search.TermStatistics)3 List (java.util.List)2 Set (java.util.Set)2 Fields (org.apache.lucene.index.Fields)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2