Search in sources :

Example 11 with Similarity

use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.

the class TestDFISimilarityFactory method testParameters.

/**
   * dfi with discountOverlaps parameter set to false
   */
public void testParameters() throws Exception {
    Similarity sim = getSimilarity("text_params");
    assertEquals(DFISimilarity.class, sim.getClass());
    DFISimilarity dfr = (DFISimilarity) sim;
    assertFalse(dfr.getDiscountOverlaps());
}
Also used : DFISimilarity(org.apache.lucene.search.similarities.DFISimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) DFISimilarity(org.apache.lucene.search.similarities.DFISimilarity)

Example 12 with Similarity

use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.

the class SweetSpotSimilarityTest method testSweetSpotComputeNorm.

public void testSweetSpotComputeNorm() throws IOException {
    final SweetSpotSimilarity ss = new SweetSpotSimilarity();
    ss.setLengthNormFactors(1, 1, 0.5f, true);
    Similarity d = new ClassicSimilarity();
    Similarity s = ss;
    // base case, should degrade
    for (int i = 1; i < 1000; i++) {
        assertEquals("base case: i=" + i, computeNorm(d, "bogus", i), computeNorm(s, "bogus", i), 0.0f);
    }
    // make a sweet spot
    ss.setLengthNormFactors(3, 10, 0.5f, true);
    for (int i = 3; i <= 10; i++) {
        assertEquals("3,10: spot i=" + i, 1.0f, computeNorm(ss, "bogus", i), 0.0f);
    }
    for (int i = 10; i < 1000; i++) {
        final float normD = computeNorm(d, "bogus", i - 9);
        final float normS = computeNorm(s, "bogus", i);
        assertEquals("3,10: 10<x : i=" + i, normD, normS, 0.01f);
    }
    // separate sweet spot for certain fields
    final SweetSpotSimilarity ssBar = new SweetSpotSimilarity();
    ssBar.setLengthNormFactors(8, 13, 0.5f, false);
    final SweetSpotSimilarity ssYak = new SweetSpotSimilarity();
    ssYak.setLengthNormFactors(6, 9, 0.5f, false);
    final SweetSpotSimilarity ssA = new SweetSpotSimilarity();
    ssA.setLengthNormFactors(5, 8, 0.5f, false);
    final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
    ssB.setLengthNormFactors(5, 8, 0.1f, false);
    Similarity sp = new PerFieldSimilarityWrapper() {

        @Override
        public Similarity get(String field) {
            if (field.equals("bar"))
                return ssBar;
            else if (field.equals("yak"))
                return ssYak;
            else if (field.equals("a"))
                return ssA;
            else if (field.equals("b"))
                return ssB;
            else
                return ss;
        }
    };
    for (int i = 3; i <= 10; i++) {
        assertEquals("f: 3,10: spot i=" + i, 1.0f, computeNorm(sp, "foo", i), 0.0f);
    }
    for (int i = 10; i < 1000; i++) {
        final float normD = computeNorm(d, "foo", i - 9);
        final float normS = computeNorm(sp, "foo", i);
        assertEquals("f: 3,10: 10<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 8; i <= 13; i++) {
        assertEquals("f: 8,13: spot i=" + i, 1.0f, computeNorm(sp, "bar", i), 0.01f);
    }
    for (int i = 6; i <= 9; i++) {
        assertEquals("f: 6,9: spot i=" + i, 1.0f, computeNorm(sp, "yak", i), 0.01f);
    }
    for (int i = 13; i < 1000; i++) {
        final float normD = computeNorm(d, "bar", i - 12);
        final float normS = computeNorm(sp, "bar", i);
        assertEquals("f: 8,13: 13<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 9; i < 1000; i++) {
        final float normD = computeNorm(d, "yak", i - 8);
        final float normS = computeNorm(sp, "yak", i);
        assertEquals("f: 6,9: 9<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 9; i < 1000; i++) {
        final float normSS = computeNorm(sp, "a", i);
        final float normS = computeNorm(sp, "b", i);
        assertTrue("s: i=" + i + " : a=" + normSS + " < b=" + normS, normSS < normS);
    }
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper)

Example 13 with Similarity

use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.

the class SweetSpotSimilarityTest method computeNorm.

private static float computeNorm(Similarity sim, String field, int length) throws IOException {
    String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
    Directory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
    w.addDocument(Collections.singleton(newTextField(field, value, Store.NO)));
    DirectoryReader reader = DirectoryReader.open(w);
    w.close();
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(sim);
    Explanation expl = searcher.explain(new TermQuery(new Term(field, "a")), 0);
    reader.close();
    dir.close();
    Explanation norm = findExplanation(expl, "fieldNorm");
    assertNotNull(norm);
    return norm.getValue();
}
Also used : IntStream(java.util.stream.IntStream) Explanation(org.apache.lucene.search.Explanation) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) RAMDirectory(org.apache.lucene.store.RAMDirectory) IOException(java.io.IOException) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) Collectors(java.util.stream.Collectors) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) Similarity(org.apache.lucene.search.similarities.Similarity) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Explanation(org.apache.lucene.search.Explanation) Term(org.apache.lucene.index.Term) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 14 with Similarity

use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.

the class TestTaxonomyFacetCounts method testReallyNoNormsForDrillDown.

public void testReallyNoNormsForDrillDown() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setSimilarity(new PerFieldSimilarityWrapper() {

        final Similarity sim = new ClassicSimilarity();

        @Override
        public Similarity get(String name) {
            assertEquals("field", name);
            return sim;
        }
    });
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    FacetsConfig config = new FacetsConfig();
    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    doc.add(new FacetField("a", "path"));
    writer.addDocument(config.build(taxoWriter, doc));
    writer.close();
    IOUtils.close(taxoWriter, dir, taxoDir);
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) FacetsConfig(org.apache.lucene.facet.FacetsConfig) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with Similarity

use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.

the class TestValueSources method testNorm.

public void testNorm() throws Exception {
    Similarity saved = searcher.getSimilarity(true);
    try {
        // no norm field (so agnostic to indexed similarity)
        searcher.setSimilarity(new ClassicSimilarity());
        ValueSource vs = new NormValueSource("byte");
        assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
        // regardless of whether norms exist, value source exists == 0
        assertAllExist(vs);
        vs = new NormValueSource("text");
        assertAllExist(vs);
    } finally {
        searcher.setSimilarity(saved);
    }
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) SumTotalTermFreqValueSource(org.apache.lucene.queries.function.valuesource.SumTotalTermFreqValueSource) DoubleConstValueSource(org.apache.lucene.queries.function.valuesource.DoubleConstValueSource) ConstValueSource(org.apache.lucene.queries.function.valuesource.ConstValueSource) QueryValueSource(org.apache.lucene.queries.function.valuesource.QueryValueSource) DocFreqValueSource(org.apache.lucene.queries.function.valuesource.DocFreqValueSource) NormValueSource(org.apache.lucene.queries.function.valuesource.NormValueSource) NumDocsValueSource(org.apache.lucene.queries.function.valuesource.NumDocsValueSource) MaxDocValueSource(org.apache.lucene.queries.function.valuesource.MaxDocValueSource) JoinDocFreqValueSource(org.apache.lucene.queries.function.valuesource.JoinDocFreqValueSource) LiteralValueSource(org.apache.lucene.queries.function.valuesource.LiteralValueSource) TotalTermFreqValueSource(org.apache.lucene.queries.function.valuesource.TotalTermFreqValueSource) IDFValueSource(org.apache.lucene.queries.function.valuesource.IDFValueSource) TermFreqValueSource(org.apache.lucene.queries.function.valuesource.TermFreqValueSource) TFValueSource(org.apache.lucene.queries.function.valuesource.TFValueSource) NormValueSource(org.apache.lucene.queries.function.valuesource.NormValueSource)

Aggregations

Similarity (org.apache.lucene.search.similarities.Similarity)48 BM25Similarity (org.apache.lucene.search.similarities.BM25Similarity)15 ClassicSimilarity (org.apache.lucene.search.similarities.ClassicSimilarity)15 Directory (org.apache.lucene.store.Directory)9 PerFieldSimilarityWrapper (org.apache.lucene.search.similarities.PerFieldSimilarityWrapper)8 SweetSpotSimilarity (org.apache.lucene.misc.SweetSpotSimilarity)7 IOException (java.io.IOException)6 Document (org.apache.lucene.document.Document)5 Term (org.apache.lucene.index.Term)5 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 Collectors (java.util.stream.Collectors)4 IntStream (java.util.stream.IntStream)4 Field (org.apache.lucene.document.Field)4 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)4 NormValueSource (org.apache.lucene.queries.function.valuesource.NormValueSource)4 BytesRef (org.apache.lucene.util.BytesRef)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Store (org.apache.lucene.document.Field.Store)3 IndexWriter (org.apache.lucene.index.IndexWriter)3 ConstValueSource (org.apache.lucene.queries.function.valuesource.ConstValueSource)3