Search in sources :

Example 1 with SimWeight

use of org.apache.lucene.search.similarities.Similarity.SimWeight in project lucene-solr by apache.

the class TestSimilarityBase method testLengthEncodingBackwardCompatibility.

public void testLengthEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = RandomPicks.randomFrom(random(), sims);
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 2, 4 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Term term = new Term("foo", "b");
            TermContext context = TermContext.build(reader.getContext(), term);
            SimWeight simWeight = similarity.computeWeight(1f, searcher.collectionStatistics("foo"), searcher.termStatistics(term, context));
            SimilarityBase.BasicSimScorer simScorer = (SimilarityBase.BasicSimScorer) similarity.simScorer(simWeight, reader.leaves().get(0));
            float docLength = simScorer.getLengthValue(0);
            assertEquals(length, (int) docLength);
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Query(org.apache.lucene.search.Query) RandomPicks(com.carrotsearch.randomizedtesting.generators.RandomPicks) FieldType(org.apache.lucene.document.FieldType) Term(org.apache.lucene.index.Term) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) TermStatistics(org.apache.lucene.search.TermStatistics) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) BytesRef(org.apache.lucene.util.BytesRef) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) TermContext(org.apache.lucene.index.TermContext) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) List(java.util.List) FieldInvertState(org.apache.lucene.index.FieldInvertState) IndexWriter(org.apache.lucene.index.IndexWriter) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermQuery(org.apache.lucene.search.TermQuery) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) SegmentInfos(org.apache.lucene.index.SegmentInfos) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TermContext(org.apache.lucene.index.TermContext) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 2 with SimWeight

use of org.apache.lucene.search.similarities.Similarity.SimWeight in project elasticsearch by elastic.

the class AllTermQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    if (needsScores == false) {
        return new TermQuery(term).createWeight(searcher, needsScores);
    }
    final TermContext termStates = TermContext.build(searcher.getTopReaderContext(), term);
    final CollectionStatistics collectionStats = searcher.collectionStatistics(term.field());
    final TermStatistics termStats = searcher.termStatistics(term, termStates);
    final Similarity similarity = searcher.getSimilarity(needsScores);
    final SimWeight stats = similarity.computeWeight(collectionStats, termStats);
    return new Weight(this) {

        @Override
        public float getValueForNormalization() throws IOException {
            return stats.getValueForNormalization();
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            stats.normalize(norm, topLevelBoost);
        }

        @Override
        public void extractTerms(Set<Term> terms) {
            terms.add(term);
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            AllTermScorer scorer = scorer(context);
            if (scorer != null) {
                int newDoc = scorer.iterator().advance(doc);
                if (newDoc == doc) {
                    float score = scorer.score();
                    float freq = scorer.freq();
                    SimScorer docScorer = similarity.simScorer(stats, context);
                    Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
                    Explanation termScoreExplanation = docScorer.explain(doc, freqExplanation);
                    Explanation payloadBoostExplanation = Explanation.match(scorer.payloadBoost(), "payloadBoost=" + scorer.payloadBoost());
                    return Explanation.match(score, "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], product of:", termScoreExplanation, payloadBoostExplanation);
                }
            }
            return Explanation.noMatch("no matching term");
        }

        @Override
        public AllTermScorer scorer(LeafReaderContext context) throws IOException {
            final Terms terms = context.reader().terms(term.field());
            if (terms == null) {
                return null;
            }
            final TermsEnum termsEnum = terms.iterator();
            if (termsEnum == null) {
                return null;
            }
            final TermState state = termStates.get(context.ord);
            if (state == null) {
                // Term does not exist in this segment
                return null;
            }
            termsEnum.seekExact(term.bytes(), state);
            PostingsEnum docs = termsEnum.postings(null, PostingsEnum.PAYLOADS);
            assert docs != null;
            return new AllTermScorer(this, docs, similarity.simScorer(stats, context));
        }
    };
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Set(java.util.Set) Similarity(org.apache.lucene.search.similarities.Similarity) Explanation(org.apache.lucene.search.Explanation) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Terms(org.apache.lucene.index.Terms) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TermStatistics(org.apache.lucene.search.TermStatistics) TermContext(org.apache.lucene.index.TermContext) Weight(org.apache.lucene.search.Weight) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermState(org.apache.lucene.index.TermState) PostingsEnum(org.apache.lucene.index.PostingsEnum)

Example 3 with SimWeight

use of org.apache.lucene.search.similarities.Similarity.SimWeight in project lucene-solr by apache.

the class NormValueSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher) context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field);
    if (similarity == null) {
        throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
    }
    // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
    // is 1 when docCount == docFreq == 1
    final SimWeight simWeight = similarity.computeWeight(1f, new CollectionStatistics(field, 1, 1, 1, 1), new TermStatistics(new BytesRef("bogus"), 1, 1));
    final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
    return new FloatDocValues(this) {

        int lastDocID = -1;

        @Override
        public float floatVal(int docID) throws IOException {
            if (docID < lastDocID) {
                throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
            }
            lastDocID = docID;
            return simScorer.score(docID, 1f);
        }
    };
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) FloatDocValues(org.apache.lucene.queries.function.docvalues.FloatDocValues) SimScorer(org.apache.lucene.search.similarities.Similarity.SimScorer) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) TermStatistics(org.apache.lucene.search.TermStatistics) BytesRef(org.apache.lucene.util.BytesRef) CollectionStatistics(org.apache.lucene.search.CollectionStatistics)

Aggregations

CollectionStatistics (org.apache.lucene.search.CollectionStatistics)3 TermStatistics (org.apache.lucene.search.TermStatistics)3 SimWeight (org.apache.lucene.search.similarities.Similarity.SimWeight)3 TermContext (org.apache.lucene.index.TermContext)2 Explanation (org.apache.lucene.search.Explanation)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 TermQuery (org.apache.lucene.search.TermQuery)2 SimScorer (org.apache.lucene.search.similarities.Similarity.SimScorer)2 BytesRef (org.apache.lucene.util.BytesRef)2 RandomPicks (com.carrotsearch.randomizedtesting.generators.RandomPicks)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 Store (org.apache.lucene.document.Field.Store)1 FieldType (org.apache.lucene.document.FieldType)1