Search in sources :

Example 1 with PerFieldSimilarityWrapper

use of org.apache.lucene.search.similarities.PerFieldSimilarityWrapper in project lucene-solr by apache.

the class SweetSpotSimilarityTest method testSweetSpotComputeNorm.

public void testSweetSpotComputeNorm() throws IOException {
    final SweetSpotSimilarity ss = new SweetSpotSimilarity();
    ss.setLengthNormFactors(1, 1, 0.5f, true);
    Similarity d = new ClassicSimilarity();
    Similarity s = ss;
    // base case, should degrade
    for (int i = 1; i < 1000; i++) {
        assertEquals("base case: i=" + i, computeNorm(d, "bogus", i), computeNorm(s, "bogus", i), 0.0f);
    }
    // make a sweet spot
    ss.setLengthNormFactors(3, 10, 0.5f, true);
    for (int i = 3; i <= 10; i++) {
        assertEquals("3,10: spot i=" + i, 1.0f, computeNorm(ss, "bogus", i), 0.0f);
    }
    for (int i = 10; i < 1000; i++) {
        final float normD = computeNorm(d, "bogus", i - 9);
        final float normS = computeNorm(s, "bogus", i);
        assertEquals("3,10: 10<x : i=" + i, normD, normS, 0.01f);
    }
    // separate sweet spot for certain fields
    final SweetSpotSimilarity ssBar = new SweetSpotSimilarity();
    ssBar.setLengthNormFactors(8, 13, 0.5f, false);
    final SweetSpotSimilarity ssYak = new SweetSpotSimilarity();
    ssYak.setLengthNormFactors(6, 9, 0.5f, false);
    final SweetSpotSimilarity ssA = new SweetSpotSimilarity();
    ssA.setLengthNormFactors(5, 8, 0.5f, false);
    final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
    ssB.setLengthNormFactors(5, 8, 0.1f, false);
    Similarity sp = new PerFieldSimilarityWrapper() {

        @Override
        public Similarity get(String field) {
            if (field.equals("bar"))
                return ssBar;
            else if (field.equals("yak"))
                return ssYak;
            else if (field.equals("a"))
                return ssA;
            else if (field.equals("b"))
                return ssB;
            else
                return ss;
        }
    };
    for (int i = 3; i <= 10; i++) {
        assertEquals("f: 3,10: spot i=" + i, 1.0f, computeNorm(sp, "foo", i), 0.0f);
    }
    for (int i = 10; i < 1000; i++) {
        final float normD = computeNorm(d, "foo", i - 9);
        final float normS = computeNorm(sp, "foo", i);
        assertEquals("f: 3,10: 10<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 8; i <= 13; i++) {
        assertEquals("f: 8,13: spot i=" + i, 1.0f, computeNorm(sp, "bar", i), 0.01f);
    }
    for (int i = 6; i <= 9; i++) {
        assertEquals("f: 6,9: spot i=" + i, 1.0f, computeNorm(sp, "yak", i), 0.01f);
    }
    for (int i = 13; i < 1000; i++) {
        final float normD = computeNorm(d, "bar", i - 12);
        final float normS = computeNorm(sp, "bar", i);
        assertEquals("f: 8,13: 13<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 9; i < 1000; i++) {
        final float normD = computeNorm(d, "yak", i - 8);
        final float normS = computeNorm(sp, "yak", i);
        assertEquals("f: 6,9: 9<x : i=" + i, normD, normS, 0.01f);
    }
    for (int i = 9; i < 1000; i++) {
        final float normSS = computeNorm(sp, "a", i);
        final float normS = computeNorm(sp, "b", i);
        assertTrue("s: i=" + i + " : a=" + normSS + " < b=" + normS, normSS < normS);
    }
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper)

Example 2 with PerFieldSimilarityWrapper

use of org.apache.lucene.search.similarities.PerFieldSimilarityWrapper in project lucene-solr by apache.

the class TestTaxonomyFacetCounts method testReallyNoNormsForDrillDown.

public void testReallyNoNormsForDrillDown() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setSimilarity(new PerFieldSimilarityWrapper() {

        final Similarity sim = new ClassicSimilarity();

        @Override
        public Similarity get(String name) {
            assertEquals("field", name);
            return sim;
        }
    });
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    FacetsConfig config = new FacetsConfig();
    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    doc.add(new FacetField("a", "path"));
    writer.addDocument(config.build(taxoWriter, doc));
    writer.close();
    IOUtils.close(taxoWriter, dir, taxoDir);
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) FacetsConfig(org.apache.lucene.facet.FacetsConfig) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with PerFieldSimilarityWrapper

use of org.apache.lucene.search.similarities.PerFieldSimilarityWrapper in project lucene-solr by apache.

the class BaseSimilarityTestCase method getSimilarity.

/** returns the similarity in use for the field */
protected Similarity getSimilarity(String field) {
    SolrCore core = h.getCore();
    RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
    Similarity sim = searcher.get().getSimilarity(true);
    searcher.decref();
    while (sim instanceof PerFieldSimilarityWrapper) {
        sim = ((PerFieldSimilarityWrapper) sim).get(field);
    }
    return sim;
}
Also used : Similarity(org.apache.lucene.search.similarities.Similarity) SolrCore(org.apache.solr.core.SolrCore) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher)

Example 4 with PerFieldSimilarityWrapper

use of org.apache.lucene.search.similarities.PerFieldSimilarityWrapper in project lucene-solr by apache.

the class TestBulkSchemaAPI method assertFieldSimilarity.

/**
   * whitebox checks the Similarity for the specified field according to {@link SolrCore#getLatestSchema}
   * 
   * Executes each of the specified Similarity-accepting validators.
   */
@SafeVarargs
private static <T extends Similarity> void assertFieldSimilarity(String fieldname, Class<T> expected, Consumer<T>... validators) {
    CoreContainer cc = jetty.getCoreContainer();
    try (SolrCore core = cc.getCore("collection1")) {
        SimilarityFactory simfac = core.getLatestSchema().getSimilarityFactory();
        assertNotNull(simfac);
        assertTrue("test only works with SchemaSimilarityFactory", simfac instanceof SchemaSimilarityFactory);
        Similarity mainSim = core.getLatestSchema().getSimilarity();
        assertNotNull(mainSim);
        // sanity check simfac vs sim in use - also verify infom called on simfac, otherwise exception
        assertEquals(mainSim, simfac.getSimilarity());
        assertTrue("test only works with PerFieldSimilarityWrapper, SchemaSimilarityFactory redefined?", mainSim instanceof PerFieldSimilarityWrapper);
        Similarity fieldSim = ((PerFieldSimilarityWrapper) mainSim).get(fieldname);
        assertEquals("wrong sim for field=" + fieldname, expected, fieldSim.getClass());
        Arrays.asList(validators).forEach(v -> v.accept((T) fieldSim));
    }
}
Also used : CoreContainer(org.apache.solr.core.CoreContainer) SweetSpotSimilarity(org.apache.lucene.misc.SweetSpotSimilarity) Similarity(org.apache.lucene.search.similarities.Similarity) DFISimilarity(org.apache.lucene.search.similarities.DFISimilarity) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) SolrCore(org.apache.solr.core.SolrCore) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) SchemaSimilarityFactory(org.apache.solr.search.similarities.SchemaSimilarityFactory) SimilarityFactory(org.apache.solr.schema.SimilarityFactory) SchemaSimilarityFactory(org.apache.solr.search.similarities.SchemaSimilarityFactory)

Example 5 with PerFieldSimilarityWrapper

use of org.apache.lucene.search.similarities.PerFieldSimilarityWrapper in project lucene-solr by apache.

the class TestDocValuesScoring method testSimple.

/* for comparing floats */
public void testSimple() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field field = newTextField("foo", "", Field.Store.NO);
    doc.add(field);
    Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
    doc.add(dvField);
    Field field2 = newTextField("bar", "", Field.Store.NO);
    doc.add(field2);
    field.setStringValue("quick brown fox");
    field2.setStringValue("quick brown fox");
    // boost x2
    dvField.setFloatValue(2f);
    iw.addDocument(doc);
    field.setStringValue("jumps over lazy brown dog");
    field2.setStringValue("jumps over lazy brown dog");
    // boost x4
    dvField.setFloatValue(4f);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    // no boosting
    IndexSearcher searcher1 = newSearcher(ir, false);
    final Similarity base = searcher1.getSimilarity(true);
    // boosting
    IndexSearcher searcher2 = newSearcher(ir, false);
    searcher2.setSimilarity(new PerFieldSimilarityWrapper() {

        final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");

        @Override
        public Similarity get(String field) {
            return "foo".equals(field) ? fooSim : base;
        }
    });
    // in this case, we searched on field "foo". first document should have 2x the score.
    TermQuery tq = new TermQuery(new Term("foo", "quick"));
    QueryUtils.check(random(), tq, searcher1);
    QueryUtils.check(random(), tq, searcher2);
    TopDocs noboost = searcher1.search(tq, 10);
    TopDocs boost = searcher2.search(tq, 10);
    assertEquals(1, noboost.totalHits);
    assertEquals(1, boost.totalHits);
    //System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc));
    assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 2f, SCORE_EPSILON);
    // this query matches only the second document, which should have 4x the score.
    tq = new TermQuery(new Term("foo", "jumps"));
    QueryUtils.check(random(), tq, searcher1);
    QueryUtils.check(random(), tq, searcher2);
    noboost = searcher1.search(tq, 10);
    boost = searcher2.search(tq, 10);
    assertEquals(1, noboost.totalHits);
    assertEquals(1, boost.totalHits);
    assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 4f, SCORE_EPSILON);
    // search on on field bar just for kicks, nothing should happen, since we setup
    // our sim provider to only use foo_boost for field foo.
    tq = new TermQuery(new Term("bar", "quick"));
    QueryUtils.check(random(), tq, searcher1);
    QueryUtils.check(random(), tq, searcher2);
    noboost = searcher1.search(tq, 10);
    boost = searcher2.search(tq, 10);
    assertEquals(1, noboost.totalHits);
    assertEquals(1, boost.totalHits);
    assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON);
    ir.close();
    dir.close();
}
Also used : Similarity(org.apache.lucene.search.similarities.Similarity) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

PerFieldSimilarityWrapper (org.apache.lucene.search.similarities.PerFieldSimilarityWrapper)6 Similarity (org.apache.lucene.search.similarities.Similarity)5 Document (org.apache.lucene.document.Document)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 Field (org.apache.lucene.document.Field)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 ClassicSimilarity (org.apache.lucene.search.similarities.ClassicSimilarity)2 Directory (org.apache.lucene.store.Directory)2 SolrCore (org.apache.solr.core.SolrCore)2 FloatDocValuesField (org.apache.lucene.document.FloatDocValuesField)1 FacetField (org.apache.lucene.facet.FacetField)1 FacetsConfig (org.apache.lucene.facet.FacetsConfig)1 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)1 IndexReader (org.apache.lucene.index.IndexReader)1 Term (org.apache.lucene.index.Term)1 SweetSpotSimilarity (org.apache.lucene.misc.SweetSpotSimilarity)1 BM25Similarity (org.apache.lucene.search.similarities.BM25Similarity)1 DFISimilarity (org.apache.lucene.search.similarities.DFISimilarity)1 TFIDFSimilarity (org.apache.lucene.search.similarities.TFIDFSimilarity)1