Search in sources :

Example 11 with TermStatistics

use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.

the class AllTermQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }
    };
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Set(java.util.Set) Similarity(org.apache.lucene.search.similarities.Similarity) Explanation(org.apache.lucene.search.Explanation) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Terms(org.apache.lucene.index.Terms) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TermStatistics(org.apache.lucene.search.TermStatistics) TermContext(org.apache.lucene.index.TermContext) Weight(org.apache.lucene.search.Weight) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 12 with TermStatistics

use of org.apache.lucene.search.TermStatistics in project lucene-solr by apache.

the class ExactStatsCache method returnLocalStats.

@Override
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
    Query q = rb.getQuery();
    try {
        HashSet<Term> terms = new HashSet<>();
        searcher.createNormalizedWeight(q, true).extractTerms(terms);
        IndexReaderContext context = searcher.getTopReaderContext();
        HashMap<String, TermStats> statsMap = new HashMap<>();
        HashMap<String, CollectionStats> colMap = new HashMap<>();
        for (Term t : terms) {
            TermContext termContext = TermContext.build(context, t);
            TermStatistics tst = searcher.localTermStatistics(t, termContext);
            if (tst.docFreq() == 0) {
                // skip terms that are not present here
                continue;
            }
            statsMap.put(t.toString(), new TermStats(t.field(), tst));
            rb.rsp.add(TERMS_KEY, t.toString());
            if (!colMap.containsKey(t.field())) {
                // collection stats for this field
                colMap.put(t.field(), new CollectionStats(searcher.localCollectionStatistics(t.field())));
            }
        }
        if (statsMap.size() != 0 && colMap.size() != 0) {
            //Don't add empty keys
            String termStatsString = StatsUtil.termStatsMapToString(statsMap);
            rb.rsp.add(TERM_STATS_KEY, termStatsString);
            String colStatsString = StatsUtil.colStatsMapToString(colMap);
            rb.rsp.add(COL_STATS_KEY, colStatsString);
            if (LOG.isDebugEnabled()) {
                LOG.debug("termStats=" + termStatsString + ", collectionStats=" + colStatsString + ", terms=" + terms + ", numDocs=" + searcher.maxDoc());
            }
        }
    } catch (IOException e) {
        LOG.error("Error collecting local stats, query='" + q.toString() + "'", e);
        throw new SolrException(ErrorCode.SERVER_ERROR, "Error collecting local stats.", e);
    }
}
Also used : Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) TermStatistics(org.apache.lucene.search.TermStatistics) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) TermContext(org.apache.lucene.index.TermContext) SolrException(org.apache.solr.common.SolrException) HashSet(java.util.HashSet)

Example 13 with TermStatistics

use of org.apache.lucene.search.TermStatistics in project lucene-solr by apache.

the class NormValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher) context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field);
    if (similarity == null) {
        throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
    }
    // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
    // is 1 when docCount == docFreq == 1
    final SimWeight simWeight = similarity.computeWeight(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1));
    final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
    return new FloatDocValues(this) {

        int lastDocID = -1;

        @Override
        public float floatVal(int docID) throws IOException {
            if (docID < lastDocID) {
                throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
            }
            lastDocID = docID;
            return simScorer.score(docID, 1f);
        }
    };
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) FloatDocValues(org.apache.lucene.queries.function.docvalues.FloatDocValues) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) TermStatistics(org.apache.lucene.search.TermStatistics) BytesRef(org.apache.lucene.util.BytesRef) CollectionStatistics(org.apache.lucene.search.CollectionStatistics)

Aggregations

TermStatistics (org.apache.lucene.search.TermStatistics)13 Term (org.apache.lucene.index.Term)7 CollectionStatistics (org.apache.lucene.search.CollectionStatistics)6 TermContext (org.apache.lucene.index.TermContext)3 Explanation (org.apache.lucene.search.Explanation)3 BytesRef (org.apache.lucene.util.BytesRef)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 PostingsEnum (org.apache.lucene.index.PostingsEnum)2 Terms (org.apache.lucene.index.Terms)2 TermsEnum (org.apache.lucene.index.TermsEnum)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Similarity (org.apache.lucene.search.similarities.Similarity)2 SimScorer (org.apache.lucene.search.similarities.Similarity.SimScorer)2 SimWeight (org.apache.lucene.search.similarities.Similarity.SimWeight)2 ObjectHashSet (com.carrotsearch.hppc.ObjectHashSet)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1