Examples with TaxonomyReader - org.apache.lucene.facet.taxonomy.TaxonomyReader

Example 26 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestRangeFacetCounts method testMixedRangeAndNonRangeTaxonomy.

/** Tests single request that mixes Range and non-Range
   *  faceting, with DrillSideways and taxonomy. */
public void testMixedRangeAndNonRangeTaxonomy() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Directory td = newDirectory();
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    for (long l = 0; l < 100; l++) {
        Document doc = new Document();
        // For computing range facet counts:
        doc.add(new NumericDocValuesField("field", l));
        // For drill down by numeric range:
        doc.add(new LongPoint("field", l));
        if ((l & 3) == 0) {
            doc.add(new FacetField("dim", "a"));
        } else {
            doc.add(new FacetField("dim", "b"));
        }
        w.addDocument(config.build(tw, doc));
    }
    final IndexReader r = w.getReader();
    final TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    IndexSearcher s = newSearcher(r, false);
    if (VERBOSE) {
        System.out.println("TEST: searcher=" + s);
    }
    DrillSideways ds = new DrillSideways(s, config, tr) {

        @Override
        protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
            FacetsCollector dimFC = drillDowns;
            FacetsCollector fieldFC = drillDowns;
            if (drillSideways != null) {
                for (int i = 0; i < drillSideways.length; i++) {
                    String dim = drillSidewaysDims[i];
                    if (dim.equals("field")) {
                        fieldFC = drillSideways[i];
                    } else {
                        dimFC = drillSideways[i];
                    }
                }
            }
            Map<String, Facets> byDim = new HashMap<>();
            byDim.put("field", new LongRangeFacetCounts("field", fieldFC, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
            byDim.put("dim", getTaxonomyFacetCounts(taxoReader, config, dimFC));
            return new MultiFacets(byDim, null);
        }

        @Override
        protected boolean scoreSubDocsAtOnce() {
            return random().nextBoolean();
        }
    };
    // First search, no drill downs:
    DrillDownQuery ddq = new DrillDownQuery(config);
    DrillSidewaysResult dsr = ds.search(null, ddq, 10);
    assertEquals(100, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    // Second search, drill down on dim=b:
    ddq = new DrillDownQuery(config);
    ddq.add("dim", "b");
    dsr = ds.search(null, ddq, 10);
    assertEquals(75, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    // Third search, drill down on "less than or equal to 10":
    ddq = new DrillDownQuery(config);
    ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
    dsr = ds.search(null, ddq, 10);
    assertEquals(11, dsr.hits.totalHits);
    assertEquals("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.facets.getTopChildren(10, "dim").toString());
    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.facets.getTopChildren(10, "field").toString());
    w.close();
    IOUtils.close(tw, tr, td, r, d);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) HashMap(java.util.HashMap) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsConfig(org.apache.lucene.facet.FacetsConfig) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) LongPoint(org.apache.lucene.document.LongPoint) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) IndexReader(org.apache.lucene.index.IndexReader) DrillSideways(org.apache.lucene.facet.DrillSideways) MultiFacets(org.apache.lucene.facet.MultiFacets) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 27 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestMultipleIndexFields method testTwoCustomsSameField.

@Test
public void testTwoCustomsSameField() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Band", "$music");
    config.setIndexFieldName("Composer", "$music");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
    facetsMap.put("Band", facets2);
    facetsMap.put("Composer", facets2);
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$facets", ir);
    assertOrdinalsExist("$music", ir);
    assertOrdinalsExist("$music", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) HashMap(java.util.HashMap) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 28 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestMultipleIndexFields method testDefault.

@Test
public void testDefault() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    // Obtain facets results and hand-test them
    assertCorrectResults(getTaxonomyFacetCounts(tr, config, sfc));
    assertOrdinalsExist("$facets", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 29 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestRandomSamplingFacetsCollector method testRandomSampling.

public void testRandomSampling() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    Random random = random();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    FacetsConfig config = new FacetsConfig();
    final int numCategories = 10;
    int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
        doc.add(new FacetField("iMod10", Integer.toString(i % numCategories)));
        writer.addDocument(config.build(taxoWriter, doc));
    }
    writer.forceMerge(CHI_SQUARE_VALUES.length - 1);
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    IOUtils.close(writer, taxoWriter);
    // Test empty results
    RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong());
    // There should be no divisions by zero
    searcher.search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);
    // There should be no divisions by zero and no null result
    assertNotNull(collectRandomZeroResults.getMatchingDocs());
    // There should be no results at all
    for (MatchingDocs doc : collectRandomZeroResults.getMatchingDocs()) {
        assertEquals(0, doc.totalHits);
    }
    // Now start searching and retrieve results.
    // Use a query to select half of the documents.
    TermQuery query = new TermQuery(new Term("EvenOdd", "even"));
    // 10% of total docs, 20% of the hits
    RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong());
    FacetsCollector fc = new FacetsCollector();
    searcher.search(query, MultiCollector.wrap(fc, random10Percent));
    final List<MatchingDocs> matchingDocs = random10Percent.getMatchingDocs();
    // count the total hits and sampled docs, also store the number of sampled
    // docs per segment
    int totalSampledDocs = 0, totalHits = 0;
    int[] numSampledDocs = new int[matchingDocs.size()];
    //    System.out.println("numSegments=" + numSampledDocs.length);
    for (int i = 0; i < numSampledDocs.length; i++) {
        MatchingDocs md = matchingDocs.get(i);
        final DocIdSetIterator iter = md.bits.iterator();
        while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ++numSampledDocs[i];
        totalSampledDocs += numSampledDocs[i];
        totalHits += md.totalHits;
    }
    // compute the chi-square value for the sampled documents' distribution
    float chi_square = 0;
    for (int i = 0; i < numSampledDocs.length; i++) {
        MatchingDocs md = matchingDocs.get(i);
        float ei = (float) md.totalHits / totalHits;
        if (ei > 0.0f) {
            float oi = (float) numSampledDocs[i] / totalSampledDocs;
            chi_square += (Math.pow(ei - oi, 2) / ei);
        }
    }
    // Verify that the chi-square value isn't too big. According to
    // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value,
    // we basically verify that there is a really small chance of hitting a very
    // bad sample (p-value < 0.05), for n-degrees of freedom. The number 'n' depends
    // on the number of segments.
    assertTrue("chisquare not statistically significant enough: " + chi_square, chi_square < CHI_SQUARE_VALUES[numSampledDocs.length]);
    // Test amortized counts - should be 5X the sampled count, but maximum numDocs/10
    final FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
    final FacetResult random10Result = random10FacetCounts.getTopChildren(10, "iMod10");
    final FacetResult amortized10Result = random10Percent.amortizeFacetCounts(random10Result, config, searcher);
    for (int i = 0; i < amortized10Result.labelValues.length; i++) {
        LabelAndValue amortized = amortized10Result.labelValues[i];
        LabelAndValue sampled = random10Result.labelValues[i];
        // since numDocs may not divide by 10 exactly, allow for some slack in the amortized count 
        assertEquals(amortized.value.floatValue(), Math.min(5 * sampled.value.floatValue(), numDocs / 10.f), 1.0);
    }
    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) Random(java.util.Random) StringField(org.apache.lucene.document.StringField) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 30 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestMultipleIndexFields method testDifferentFieldsAndText.

@Test
public void testDifferentFieldsAndText() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Band", "$bands");
    config.setIndexFieldName("Composer", "$composers");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    facetsMap.put("Band", getTaxonomyFacetCounts(tr, config, sfc, "$bands"));
    facetsMap.put("Composer", getTaxonomyFacetCounts(tr, config, sfc, "$composers"));
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$facets", ir);
    assertOrdinalsExist("$bands", ir);
    assertOrdinalsExist("$composers", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Aggregations

TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)33 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)25 Directory (org.apache.lucene.store.Directory)20 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)13 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)13 FacetResult (org.apache.lucene.facet.FacetResult)12 Facets (org.apache.lucene.facet.Facets)11 FacetsCollector (org.apache.lucene.facet.FacetsCollector)11 Test (org.junit.Test)11 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9 DrillSidewaysResult (org.apache.lucene.facet.DrillSideways.DrillSidewaysResult)9 Document (org.apache.lucene.document.Document)8 FastTaxonomyFacetCounts (org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts)8 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 ArrayList (java.util.ArrayList)7 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)7 FacetLabel (org.apache.lucene.facet.taxonomy.FacetLabel)7 IndexReader (org.apache.lucene.index.IndexReader)7