Examples with DirectoryTaxonomyWriter - org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter

Example 46 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestMultipleIndexFields method testDefault.

@Test
public void testDefault() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    // Obtain facets results and hand-test them
    assertCorrectResults(getTaxonomyFacetCounts(tr, config, sfc));
    assertOrdinalsExist("$facets", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 47 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestRandomSamplingFacetsCollector method testRandomSampling.

public void testRandomSampling() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    Random random = random();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    FacetsConfig config = new FacetsConfig();
    final int numCategories = 10;
    int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
        doc.add(new FacetField("iMod10", Integer.toString(i % numCategories)));
        writer.addDocument(config.build(taxoWriter, doc));
    }
    writer.forceMerge(CHI_SQUARE_VALUES.length - 1);
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    IOUtils.close(writer, taxoWriter);
    // Test empty results
    RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong());
    // There should be no divisions by zero
    searcher.search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);
    // There should be no divisions by zero and no null result
    assertNotNull(collectRandomZeroResults.getMatchingDocs());
    // There should be no results at all
    for (MatchingDocs doc : collectRandomZeroResults.getMatchingDocs()) {
        assertEquals(0, doc.totalHits);
    }
    // Now start searching and retrieve results.
    // Use a query to select half of the documents.
    TermQuery query = new TermQuery(new Term("EvenOdd", "even"));
    // 10% of total docs, 20% of the hits
    RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong());
    FacetsCollector fc = new FacetsCollector();
    searcher.search(query, MultiCollector.wrap(fc, random10Percent));
    final List<MatchingDocs> matchingDocs = random10Percent.getMatchingDocs();
    // count the total hits and sampled docs, also store the number of sampled
    // docs per segment
    int totalSampledDocs = 0, totalHits = 0;
    int[] numSampledDocs = new int[matchingDocs.size()];
    //    System.out.println("numSegments=" + numSampledDocs.length);
    for (int i = 0; i < numSampledDocs.length; i++) {
        MatchingDocs md = matchingDocs.get(i);
        final DocIdSetIterator iter = md.bits.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ++numSampledDocs[i];
        totalSampledDocs += numSampledDocs[i];
        totalHits += md.totalHits;
    }
    // compute the chi-square value for the sampled documents' distribution
    float chi_square = 0;
    for (int i = 0; i < numSampledDocs.length; i++) {
        MatchingDocs md = matchingDocs.get(i);
        float ei = (float) md.totalHits / totalHits;
        if (ei > 0.0f) {
            float oi = (float) numSampledDocs[i] / totalSampledDocs;
            chi_square += (Math.pow(ei - oi, 2) / ei);
        }
    }
    // Verify that the chi-square value isn't too big. According to
    // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value,
    // we basically verify that there is a really small chance of hitting a very
    // bad sample (p-value < 0.05), for n-degrees of freedom. The number 'n' depends
    // on the number of segments.
    assertTrue("chisquare not statistically significant enough: " + chi_square, chi_square < CHI_SQUARE_VALUES[numSampledDocs.length]);
    // Test amortized counts - should be 5X the sampled count, but maximum numDocs/10
    final FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
    final FacetResult random10Result = random10FacetCounts.getTopChildren(10, "iMod10");
    final FacetResult amortized10Result = random10Percent.amortizeFacetCounts(random10Result, config, searcher);
    for (int i = 0; i < amortized10Result.labelValues.length; i++) {
        LabelAndValue amortized = amortized10Result.labelValues[i];
        LabelAndValue sampled = random10Result.labelValues[i];
        // since numDocs may not divide by 10 exactly, allow for some slack in the amortized count 
        assertEquals(amortized.value.floatValue(), Math.min(5 * sampled.value.floatValue(), numDocs / 10.f), 1.0);
    }
    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) Random(java.util.Random) StringField(org.apache.lucene.document.StringField) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 48 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestMultipleIndexFields method testDifferentFieldsAndText.

@Test
public void testDifferentFieldsAndText() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Band", "$bands");
    config.setIndexFieldName("Composer", "$composers");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    facetsMap.put("Band", getTaxonomyFacetCounts(tr, config, sfc, "$bands"));
    facetsMap.put("Composer", getTaxonomyFacetCounts(tr, config, sfc, "$composers"));
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$facets", ir);
    assertOrdinalsExist("$bands", ir);
    assertOrdinalsExist("$composers", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) HashMap(java.util.HashMap) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 49 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyFacetSumValueSource method testSumScoreAggregator.

public void testSumScoreAggregator() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
    FacetsConfig config = new FacetsConfig();
    for (int i = atLeast(30); i > 0; --i) {
        Document doc = new Document();
        if (random().nextBoolean()) {
            // don't match all documents
            doc.add(new StringField("f", "v", Field.Store.NO));
        }
        doc.add(new FacetField("dim", "a"));
        iw.addDocument(config.build(taxoWriter, doc));
    }
    DirectoryReader r = DirectoryReader.open(iw);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    FacetsCollector fc = new FacetsCollector(true);
    BoostQuery csq = new BoostQuery(new ConstantScoreQuery(new MatchAllDocsQuery()), 2f);
    TopDocs td = FacetsCollector.search(newSearcher(r), csq, 10, fc);
    Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, DoubleValuesSource.SCORES);
    int expected = (int) (td.getMaxScore() * td.totalHits);
    assertEquals(expected, facets.getSpecificValue("dim", "a").intValue());
    iw.close();
    IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BoostQuery(org.apache.lucene.search.BoostQuery) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 50 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyFacetSumValueSource method testWithScore.

public void testWithScore() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
    FacetsConfig config = new FacetsConfig();
    for (int i = 0; i < 4; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField("price", (i + 1)));
        doc.add(new FacetField("a", Integer.toString(i % 2)));
        iw.addDocument(config.build(taxoWriter, doc));
    }
    DirectoryReader r = DirectoryReader.open(iw);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    FacetsCollector fc = new FacetsCollector(true);
    // score documents by their 'price' field - makes asserting the correct counts for the categories easier
    Query q = new FunctionQuery(new LongFieldSource("price"));
    FacetsCollector.search(newSearcher(r), q, 10, fc);
    Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, DoubleValuesSource.SCORES);
    assertEquals("dim=a path=[] value=10.0 childCount=2\n  1 (6.0)\n  0 (4.0)\n", facets.getTopChildren(10, "a").toString());
    iw.close();
    IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Query(org.apache.lucene.search.Query) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BoostQuery(org.apache.lucene.search.BoostQuery) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) LongFieldSource(org.apache.lucene.queries.function.valuesource.LongFieldSource) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Aggregations

DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)85 Directory (org.apache.lucene.store.Directory)73 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)52 Document (org.apache.lucene.document.Document)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 FacetsConfig (org.apache.lucene.facet.FacetsConfig)35 FacetField (org.apache.lucene.facet.FacetField)31 Test (org.junit.Test)28 IndexSearcher (org.apache.lucene.search.IndexSearcher)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)26 IndexWriter (org.apache.lucene.index.IndexWriter)25 Facets (org.apache.lucene.facet.Facets)22 SlowRAMDirectory (org.apache.lucene.facet.SlowRAMDirectory)21 FacetsCollector (org.apache.lucene.facet.FacetsCollector)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)15 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)15 FacetResult (org.apache.lucene.facet.FacetResult)14 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)12 TaxonomyWriter (org.apache.lucene.facet.taxonomy.TaxonomyWriter)9