use of org.apache.lucene.index.TermContext in project lucene-solr by apache.
the class MultiTermQueryConstantScoreWrapper method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
/** Try to collect terms from the given terms enum and return true iff all
* terms could be collected. If {@code false} is returned, the enum is
* left positioned on the next term. */
private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
for (int i = 0; i < threshold; ++i) {
final BytesRef term = termsEnum.next();
if (term == null) {
return true;
}
TermState state = termsEnum.termState();
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
}
return termsEnum.next() == null;
}
/**
* On the given leaf context, try to either rewrite to a disjunction if
* there are few terms, or build a bitset containing matching docs.
*/
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(query.field);
if (terms == null) {
// field does not exist
return new WeightOrDocIdSet((DocIdSet) null);
}
final TermsEnum termsEnum = query.getTermsEnum(terms);
assert termsEnum != null;
PostingsEnum docs = null;
final List<TermAndState> collectedTerms = new ArrayList<>();
if (collectTerms(context, termsEnum, collectedTerms)) {
// build a boolean query
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (TermAndState t : collectedTerms) {
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
}
Query q = new ConstantScoreQuery(bq.build());
final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
return new WeightOrDocIdSet(weight);
}
// Too many terms: go back to the terms we already collected and start building the bit set
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
builder.add(docs);
}
}
// Then keep filling the bit set with remaining terms
do {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
} while (termsEnum.next() != null);
return new WeightOrDocIdSet(builder.build());
}
private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), disi);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
return new DefaultBulkScorer(scorer);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
return scorer(weightOrBitSet.set);
}
}
};
}
use of org.apache.lucene.index.TermContext in project lucene-solr by apache.
the class TermInSetQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public void extractTerms(Set<Term> terms) {
// no-op
// This query is for abuse cases when the number of terms is too high to
// run efficiently as a BooleanQuery. So likewise we hide its terms in
// order to protect highlighters
}
/**
* On the given leaf context, try to either rewrite to a disjunction if
* there are few matching terms, or build a bitset containing matching docs.
*/
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
final Fields fields = reader.fields();
Terms terms = fields.terms(field);
if (terms == null) {
return null;
}
TermsEnum termsEnum = terms.iterator();
PostingsEnum docs = null;
TermIterator iterator = termData.iterator();
// We will first try to collect up to 'threshold' terms into 'matchingTerms'
// if there are two many terms, we will fall back to building the 'builder'
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
assert termData.size() > threshold : "Query should have been rewritten";
List<TermAndState> matchingTerms = new ArrayList<>(threshold);
DocIdSetBuilder builder = null;
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
assert field.equals(iterator.field());
if (termsEnum.seekExact(term)) {
if (matchingTerms == null) {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
} else if (matchingTerms.size() < threshold) {
matchingTerms.add(new TermAndState(field, termsEnum));
} else {
assert matchingTerms.size() == threshold;
builder = new DocIdSetBuilder(reader.maxDoc(), terms);
docs = termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
for (TermAndState t : matchingTerms) {
t.termsEnum.seekExact(t.term, t.state);
docs = t.termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
}
matchingTerms = null;
}
}
}
if (matchingTerms != null) {
assert builder == null;
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (TermAndState t : matchingTerms) {
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
bq.add(new TermQuery(new Term(t.field, t.term), termContext), Occur.SHOULD);
}
Query q = new ConstantScoreQuery(bq.build());
final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
return new WeightOrDocIdSet(weight);
} else {
assert builder != null;
return new WeightOrDocIdSet(builder.build());
}
}
private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), disi);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet == null) {
return null;
} else if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
return new DefaultBulkScorer(scorer);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet == null) {
return null;
} else if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
return scorer(weightOrBitSet.set);
}
}
};
}
use of org.apache.lucene.index.TermContext in project lucene-solr by apache.
the class ScoringRewrite method rewrite.
@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final B builder = getTopLevelBuilder();
final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
collectTerms(reader, query, col);
final int size = col.terms.size();
if (size > 0) {
final int[] sort = col.terms.sort();
final float[] boost = col.array.boost;
final TermContext[] termStates = col.array.termState;
for (int i = 0; i < size; i++) {
final int pos = sort[i];
final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
assert reader.docFreq(term) == termStates[pos].docFreq();
addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
}
}
return build(builder);
}
use of org.apache.lucene.index.TermContext in project lucene-solr by apache.
the class SpanTermQuery method createWeight.
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final TermContext context;
final IndexReaderContext topContext = searcher.getTopReaderContext();
if (termContext == null || termContext.wasBuiltFor(topContext) == false) {
context = TermContext.build(topContext, term);
} else {
context = termContext;
}
return new SpanTermWeight(context, searcher, needsScores ? Collections.singletonMap(term, context) : null, boost);
}
use of org.apache.lucene.index.TermContext in project lucene-solr by apache.
the class ShardSearchingTestBase method getNodeTermStats.
// Mock: in a real env, this would hit the wire and get
// term stats from remote node
Map<Term, TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException {
final NodeState node = nodes[nodeID];
final Map<Term, TermStatistics> stats = new HashMap<>();
final IndexSearcher s = node.searchers.acquire(version);
if (s == null) {
throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
}
try {
for (Term term : terms) {
final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term);
stats.put(term, s.termStatistics(term, termContext));
}
} finally {
node.searchers.release(s);
}
return stats;
}
Aggregations