use of org.apache.lucene.index.TermState in project elasticsearch by elastic.
the class BlendedTermQuery method adjustTTF.
private TermContext adjustTTF(IndexReaderContext readerContext, TermContext termContext, long sumTTF) {
assert termContext.wasBuiltFor(readerContext);
if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
return termContext;
}
TermContext newTermContext = new TermContext(readerContext);
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
int df = termContext.docFreq();
long ttf = sumTTF;
for (int i = 0; i < len; i++) {
TermState termState = termContext.get(i);
if (termState == null) {
continue;
}
newTermContext.register(termState, i, df, ttf);
df = 0;
ttf = 0;
}
return newTermContext;
}
use of org.apache.lucene.index.TermState in project elasticsearch by elastic.
the class AllTermQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
if (needsScores == false) {
return new TermQuery(term).createWeight(searcher, needsScores);
}
final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
final TermStatistics termStats = searcher.termStatistics(term, termStates);
final Similarity similarity = searcher.getSimilarity(needsScores);
final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
return new Weight(this) {
@Override
public float getValueForNormalization() throws IOException {
return stats.getValueForNormalization();
}
@Override
public void normalize(float norm, float topLevelBoost) {
stats.normalize(norm, topLevelBoost);
}
@Override
public void extractTerms(Set<Term> terms) {
terms.add(term);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
AllTermScorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float score = scorer.score();
float freq = scorer.freq();
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
}
}
return Explanation.noMatch("no matching term");
}
@Override
public AllTermScorer scorer(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator();
if (termsEnum == null) {
return null;
}
final TermState state = termStates.get(context.ord);
if (state == null) {
// Term does not exist in this segment
return null;
}
termsEnum.seekExact(term.bytes(), state);
PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assert docs != null;
return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
}
};
}
use of org.apache.lucene.index.TermState in project lucene-solr by apache.
the class SegmentTermsEnum method termState.
@Override
public TermState termState() throws IOException {
assert !eof;
currentFrame.decodeMetaData();
TermState ts = currentFrame.state.clone();
//if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
return ts;
}
use of org.apache.lucene.index.TermState in project lucene-solr by apache.
the class MultiTermQueryConstantScoreWrapper method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
/** Try to collect terms from the given terms enum and return true iff all
* terms could be collected. If {@code false} is returned, the enum is
* left positioned on the next term. */
private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
for (int i = 0; i < threshold; ++i) {
final BytesRef term = termsEnum.next();
if (term == null) {
return true;
}
TermState state = termsEnum.termState();
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
}
return termsEnum.next() == null;
}
/**
* On the given leaf context, try to either rewrite to a disjunction if
* there are few terms, or build a bitset containing matching docs.
*/
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(query.field);
if (terms == null) {
// field does not exist
return new WeightOrDocIdSet((DocIdSet) null);
}
final TermsEnum termsEnum = query.getTermsEnum(terms);
assert termsEnum != null;
PostingsEnum docs = null;
final List<TermAndState> collectedTerms = new ArrayList<>();
if (collectTerms(context, termsEnum, collectedTerms)) {
// build a boolean query
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (TermAndState t : collectedTerms) {
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
}
Query q = new ConstantScoreQuery(bq.build());
final Weight weight = searcher.rewrite(q).createWeight(searcher, needsScores, score());
return new WeightOrDocIdSet(weight);
}
// Too many terms: go back to the terms we already collected and start building the bit set
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
builder.add(docs);
}
}
// Then keep filling the bit set with remaining terms
do {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
} while (termsEnum.next() != null);
return new WeightOrDocIdSet(builder.build());
}
private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), disi);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
return new DefaultBulkScorer(scorer);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
return scorer(weightOrBitSet.set);
}
}
};
}
use of org.apache.lucene.index.TermState in project crate by crate.
the class BlendedTermQuery method adjustTTF.
private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termContext, long sumTTF) throws IOException {
assert termContext.wasBuiltFor(readerContext);
if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
return termContext;
}
TermStates newTermContext = new TermStates(readerContext);
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
int df = termContext.docFreq();
long ttf = sumTTF;
if (leaves != null) {
for (int i = 0; i < len; i++) {
TermState termState = termContext.get(leaves.get(i));
if (termState == null) {
continue;
}
newTermContext.register(termState, i, df, ttf);
df = 0;
ttf = 0;
}
}
return newTermContext;
}
Aggregations