use of org.apache.lucene.index.TermState in project elasticsearch by elastic.
the class BlendedTermQuery method adjustDF.
private static TermContext adjustDF(IndexReaderContext readerContext, TermContext ctx, int newDocFreq) {
assert ctx.wasBuiltFor(readerContext);
// Use a value of ttf that is consistent with the doc freq (ie. gte)
long newTTF;
if (ctx.totalTermFreq() < 0) {
newTTF = -1;
} else {
newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
}
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
TermContext newCtx = new TermContext(readerContext);
for (int i = 0; i < len; ++i) {
TermState termState = ctx.get(i);
if (termState == null) {
continue;
}
newCtx.register(termState, i, newDocFreq, newTTF);
newDocFreq = 0;
newTTF = 0;
}
return newCtx;
}
use of org.apache.lucene.index.TermState in project lucene-solr by apache.
the class IDVersionSegmentTermsEnum method termState.
@Override
public TermState termState() throws IOException {
assert !eof;
currentFrame.decodeMetaData();
TermState ts = currentFrame.state.clone();
//if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
return ts;
}
use of org.apache.lucene.index.TermState in project lucene-solr by apache.
the class BlendedTermQuery method adjustFrequencies.
private static TermContext adjustFrequencies(IndexReaderContext readerContext, TermContext ctx, int artificialDf, long artificialTtf) {
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
TermContext newCtx = new TermContext(readerContext);
for (int i = 0; i < len; ++i) {
TermState termState = ctx.get(i);
if (termState == null) {
continue;
}
newCtx.register(termState, i);
}
newCtx.accumulateStatistics(artificialDf, artificialTtf);
return newCtx;
}
use of org.apache.lucene.index.TermState in project lucene-solr by apache.
the class TopTermsRewrite method rewrite.
@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final int maxSize = Math.min(size, getMaxSize());
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
collectTerms(reader, query, new TermCollector() {
private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);
private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<>();
private TermsEnum termsEnum;
private BoostAttribute boostAtt;
private ScoreTerm st;
@Override
public void setNextEnum(TermsEnum termsEnum) {
this.termsEnum = termsEnum;
assert compareToLastTerm(null);
// lazy init the initial ScoreTerm because comparator is not known on ctor:
if (st == null)
st = new ScoreTerm(new TermContext(topReaderContext));
boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
// for assert:
private BytesRefBuilder lastTerm;
private boolean compareToLastTerm(BytesRef t) {
if (lastTerm == null && t != null) {
lastTerm = new BytesRefBuilder();
lastTerm.append(t);
} else if (t == null) {
lastTerm = null;
} else {
assert lastTerm.get().compareTo(t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
lastTerm.copyBytes(t);
}
return true;
}
@Override
public boolean collect(BytesRef bytes) throws IOException {
final float boost = boostAtt.getBoost();
// terms in order
assert compareToLastTerm(bytes);
// ignore uncompetitive hits
if (stQueue.size() == maxSize) {
final ScoreTerm t = stQueue.peek();
if (boost < t.boost)
return true;
if (boost == t.boost && bytes.compareTo(t.bytes.get()) > 0)
return true;
}
ScoreTerm t = visitedTerms.get(bytes);
final TermState state = termsEnum.termState();
assert state != null;
if (t != null) {
// if the term is already in the PQ, only update docFreq of term in PQ
assert t.boost == boost : "boost should be equal in all segment TermsEnums";
t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
// add new entry in PQ, we must clone the term, else it may get overwritten!
st.bytes.copyBytes(bytes);
st.boost = boost;
visitedTerms.put(st.bytes.get(), st);
assert st.termState.docFreq() == 0;
st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
stQueue.offer(st);
// possibly drop entries from queue
if (stQueue.size() > maxSize) {
st = stQueue.poll();
visitedTerms.remove(st.bytes.get());
// reset the termstate!
st.termState.clear();
} else {
st = new ScoreTerm(new TermContext(topReaderContext));
}
assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
// set maxBoostAtt with values to help FuzzyTermsEnum to optimize
if (stQueue.size() == maxSize) {
t = stQueue.peek();
maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
maxBoostAtt.setCompetitiveTerm(t.bytes.get());
}
}
return true;
}
});
final B b = getTopLevelBuilder();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes.toBytesRef());
// We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
// but truncate such boosts to 0.0f when building the query:
// add to query
addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState);
}
return build(b);
}
use of org.apache.lucene.index.TermState in project crate by crate.
the class BlendedTermQuery method adjustDF.
private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException {
assert ctx.wasBuiltFor(readerContext);
// Use a value of ttf that is consistent with the doc freq (ie. gte)
long newTTF;
if (ctx.totalTermFreq() < 0) {
newTTF = -1;
} else {
newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
}
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
TermStates newCtx = new TermStates(readerContext);
if (leaves != null) {
for (int i = 0; i < len; ++i) {
TermState termState = ctx.get(leaves.get(i));
if (termState == null) {
continue;
}
newCtx.register(termState, i, newDocFreq, newTTF);
newDocFreq = 0;
newTTF = 0;
}
}
return newCtx;
}
Aggregations