Search in sources :

Example 6 with CollectionStatistics

use of org.apache.lucene.search.CollectionStatistics in project elasticsearch by elastic.

the class DfsPhase method execute.

@Override
public void execute(SearchContext context) {
    final ObjectHashSet<Term> termsSet = new ObjectHashSet<>();
    try {
        context.searcher().createNormalizedWeight(context.query(), true).extractTerms(new DelegateSet(termsSet));
        for (RescoreSearchContext rescoreContext : context.rescore()) {
            rescoreContext.rescorer().extractTerms(context, rescoreContext, new DelegateSet(termsSet));
        }
        Term[] terms = termsSet.toArray(Term.class);
        TermStatistics[] termStatistics = new TermStatistics[terms.length];
        IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext();
        for (int i = 0; i < terms.length; i++) {
            if (context.isCancelled()) {
                throw new TaskCancelledException("cancelled");
            }
            // LUCENE 4 UPGRADE: cache TermContext?
            TermContext termContext = TermContext.build(indexReaderContext, terms[i]);
            termStatistics[i] = context.searcher().termStatistics(terms[i], termContext);
        }
        ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
        for (Term term : terms) {
            assert term.field() != null : "field is null";
            if (!fieldStatistics.containsKey(term.field())) {
                final CollectionStatistics collectionStatistics = context.searcher().collectionStatistics(term.field());
                fieldStatistics.put(term.field(), collectionStatistics);
                if (context.isCancelled()) {
                    throw new TaskCancelledException("cancelled");
                }
            }
        }
        context.dfsResult().termsStatistics(terms, termStatistics).fieldStatistics(fieldStatistics).maxDoc(context.searcher().getIndexReader().maxDoc());
    } catch (Exception e) {
        throw new DfsPhaseExecutionException(context, "Exception during dfs phase", e);
    } finally {
        // don't hold on to terms
        termsSet.clear();
    }
}
Also used : RescoreSearchContext(org.elasticsearch.search.rescore.RescoreSearchContext) Term(org.apache.lucene.index.Term) TermStatistics(org.apache.lucene.search.TermStatistics) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) TermContext(org.apache.lucene.index.TermContext) SearchContextException(org.elasticsearch.search.SearchContextException) TaskCancelledException(org.elasticsearch.tasks.TaskCancelledException) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) ObjectHashSet(com.carrotsearch.hppc.ObjectHashSet) TaskCancelledException(org.elasticsearch.tasks.TaskCancelledException)

Example 7 with CollectionStatistics

use of org.apache.lucene.search.CollectionStatistics in project elasticsearch by elastic.

the class AllTermQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }
    };
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Set(java.util.Set) Similarity(org.apache.lucene.search.similarities.Similarity) Explanation(org.apache.lucene.search.Explanation) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Terms(org.apache.lucene.index.Terms) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TermStatistics(org.apache.lucene.search.TermStatistics) TermContext(org.apache.lucene.index.TermContext) Weight(org.apache.lucene.search.Weight) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 8 with CollectionStatistics

use of org.apache.lucene.search.CollectionStatistics in project lucene-solr by apache.

the class NormValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher) context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field);
    if (similarity == null) {
        throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
    }
    // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
    // is 1 when docCount == docFreq == 1
    final SimWeight simWeight = similarity.computeWeight(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1));
    final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
    return new FloatDocValues(this) {

        int lastDocID = -1;

        @Override
        public float floatVal(int docID) throws IOException {
            if (docID < lastDocID) {
                throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
            }
            lastDocID = docID;
            return simScorer.score(docID, 1f);
        }
    };
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) FloatDocValues(org.apache.lucene.queries.function.docvalues.FloatDocValues) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) TermStatistics(org.apache.lucene.search.TermStatistics) BytesRef(org.apache.lucene.util.BytesRef) CollectionStatistics(org.apache.lucene.search.CollectionStatistics)

Aggregations

CollectionStatistics (org.apache.lucene.search.CollectionStatistics)8 TermStatistics (org.apache.lucene.search.TermStatistics)6 Term (org.apache.lucene.index.Term)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 TermContext (org.apache.lucene.index.TermContext)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Similarity (org.apache.lucene.search.similarities.Similarity)2 SimScorer (org.apache.lucene.search.similarities.Similarity.SimScorer)2 SimWeight (org.apache.lucene.search.similarities.Similarity.SimWeight)2 ObjectHashSet (com.carrotsearch.hppc.ObjectHashSet)1 IOException (java.io.IOException)1 Set (java.util.Set)1 FieldInvertState (org.apache.lucene.index.FieldInvertState)1 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)1 LeafReader (org.apache.lucene.index.LeafReader)1 NumericDocValues (org.apache.lucene.index.NumericDocValues)1 PostingsEnum (org.apache.lucene.index.PostingsEnum)1 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)1 TermState (org.apache.lucene.index.TermState)1 Terms (org.apache.lucene.index.Terms)1