use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.
the class AllTermQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
if (needsScores == false) {
return new TermQuery(term).createWeight(searcher, needsScores);
}
final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
final TermStatistics termStats = searcher.termStatistics(term, termStates);
final Similarity similarity = searcher.getSimilarity(needsScores);
final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
return new Weight(this) {
@Override
public float getValueForNormalization() throws IOException {
return stats.getValueForNormalization();
}
@Override
public void normalize(float norm, float topLevelBoost) {
stats.normalize(norm, topLevelBoost);
}
@Override
public void extractTerms(Set<Term> terms) {
terms.add(term);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
AllTermScorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float score = scorer.score();
float freq = scorer.freq();
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
}
}
return Explanation.noMatch("no matching term");
}
@Override
public AllTermScorer scorer(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator();
if (termsEnum == null) {
return null;
}
final TermState state = termStates.get(context.ord);
if (state == null) {
// Term does not exist in this segment
return null;
}
termsEnum.seekExact(term.bytes(), state);
PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assert docs != null;
return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
}
};
}
use of org.apache.lucene.search.TermStatistics in project lucene-solr by apache.
the class ExactStatsCache method returnLocalStats.
@Override
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
Query q = rb.getQuery();
try {
HashSet<Term> terms = new HashSet<>();
searcher.createNormalizedWeight(q, true).extractTerms(terms);
IndexReaderContext context = searcher.getTopReaderContext();
HashMap<String, TermStats> statsMap = new HashMap<>();
HashMap<String, CollectionStats> colMap = new HashMap<>();
for (Term t : terms) {
TermContext termContext = TermContext.build(context, t);
TermStatistics tst = searcher.localTermStatistics(t, termContext);
if (tst.docFreq() == 0) {
// skip terms that are not present here
continue;
}
statsMap.put(t.toString(), new TermStats(t.field(), tst));
rb.rsp.add(TERMS_KEY, t.toString());
if (!colMap.containsKey(t.field())) {
// collection stats for this field
colMap.put(t.field(), new CollectionStats(searcher.localCollectionStatistics(t.field())));
}
}
if (statsMap.size() != 0 && colMap.size() != 0) {
//Don't add empty keys
String termStatsString = StatsUtil.termStatsMapToString(statsMap);
rb.rsp.add(TERM_STATS_KEY, termStatsString);
String colStatsString = StatsUtil.colStatsMapToString(colMap);
rb.rsp.add(COL_STATS_KEY, colStatsString);
if (LOG.isDebugEnabled()) {
LOG.debug("termStats=" + termStatsString + ", collectionStats=" + colStatsString + ", terms=" + terms + ", numDocs=" + searcher.maxDoc());
}
}
} catch (IOException e) {
LOG.error("Error collecting local stats, query='" + q.toString() + "'", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error collecting local stats.", e);
}
}
use of org.apache.lucene.search.TermStatistics in project lucene-solr by apache.
the class NormValueSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher) context.get("searcher");
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field);
if (similarity == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
}
// Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
// is 1 when docCount == docFreq == 1
final SimWeight simWeight = similarity.computeWeight(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1));
final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
return new FloatDocValues(this) {
int lastDocID = -1;
@Override
public float floatVal(int docID) throws IOException {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
}
lastDocID = docID;
return simScorer.score(docID, 1f);
}
};
}
Aggregations