Search in sources :

Example 1 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.

the class LuceneNativeFacet method facetsWithSearch.

/**
   * User runs a query and counts facets.
   */
private List<FacetResult> facetsWithSearch() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    FacetsCollector fc = new FacetsCollector();
    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query:
    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
    // Retrieve results
    List<FacetResult> results = new ArrayList<FacetResult>();
    // Count both "Publish Date" and "Author" dimensions
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
    results.add(facets.getTopChildren(10, "Author"));
    results.add(facets.getTopChildren(10, "Publish Date"));
    indexReader.close();
    taxoReader.close();
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) ArrayList(java.util.ArrayList) FacetResult(org.apache.lucene.facet.FacetResult) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 2 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.

the class LuceneNativeFacet method drillSideways.

/**
   * User drills down on 'Publish Date/2010', and we return facets for both 'Publish Date' and 'Author', using DrillSideways.
   */
private List<FacetResult> drillSideways() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    // Passing no baseQuery means we drill down on all
    // documents ("browse only"):
    DrillDownQuery q = new DrillDownQuery(config);
    // Now user drills down on Publish Date/2010:
    q.add("Publish Date", "2010");
    DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
    DrillSidewaysResult result = ds.search(q, 10);
    // Retrieve results
    List<FacetResult> facets = result.facets.getAllDims(10);
    indexReader.close();
    taxoReader.close();
    return facets;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) DrillSideways(org.apache.lucene.facet.DrillSideways) FacetResult(org.apache.lucene.facet.FacetResult) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 3 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.

the class LuceneNativeFacet method facetsOnly.

/**
   * User runs a query and counts facets only without collecting the matching documents.
   */
private List<FacetResult> facetsOnly() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    FacetsCollector fc = new FacetsCollector();
    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query:
    searcher.search(new MatchAllDocsQuery(), null, /* Filter */
    fc);
    // Retrieve results
    List<FacetResult> results = new ArrayList<FacetResult>();
    // Count both "Publish Date" and "Author" dimensions
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
    results.add(facets.getTopChildren(10, "Author"));
    results.add(facets.getTopChildren(10, "Publish Date"));
    indexReader.close();
    taxoReader.close();
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) ArrayList(java.util.ArrayList) FacetResult(org.apache.lucene.facet.FacetResult) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 4 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.

the class LuceneNativeFacet method drillDown.

/**
   * User drills down on 'Publish Date/2010', and we return facets for 'Author'
   */
private FacetResult drillDown() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    // Passing no baseQuery means we drill down on all
    // documents ("browse only"):
    DrillDownQuery q = new DrillDownQuery(config);
    // Now user drills down on Publish Date/2010:
    q.add("Publish Date", "2010");
    FacetsCollector fc = new FacetsCollector();
    FacetsCollector.search(searcher, q, 10, fc);
    // Retrieve results
    Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
    FacetResult result = facets.getTopChildren(10, "Author");
    indexReader.close();
    taxoReader.close();
    return result;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) FacetResult(org.apache.lucene.facet.FacetResult) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 5 with TaxonomyReader

use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.

the class TestDrillSideways method testBasic.

public void testBasic() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    // Writes facet ords to a separate directory from the
    // main index:
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    config.setHierarchical("Publish Date", true);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    writer.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    writer.addDocument(config.build(taxoWriter, doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    //System.out.println("searcher=" + searcher);
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
    //  case: drill-down on a single field; in this
    // case the drill-sideways + drill-down counts ==
    // drill-down of just the query:
    DrillDownQuery ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    DrillSidewaysResult r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Same simple case, but no baseQuery (pure browse):
    // drill-down on a single field; in this case the
    // drill-sideways + drill-down counts == drill-down of
    // just the query:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Another simple case: drill-down on single fields
    // but OR of two values
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Bob");
    r = ds.search(null, ddq, 10);
    assertEquals(3, r.hits.totalHits);
    // Publish Date is only drill-down: Lisa and Bob
    // (drill-down) published twice in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    assertTrue(r.facets instanceof MultiFacets);
    List<FacetResult> allResults = r.facets.getAllDims(10);
    assertEquals(2, allResults.size());
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", allResults.get(0).toString());
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", allResults.get(1).toString());
    // More interesting case: drill-down on two fields
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Publish Date", "2010");
    r = ds.search(null, ddq, 10);
    assertEquals(1, r.hits.totalHits);
    // Publish Date is drill-sideways + drill-down: Lisa
    // (drill-down) published once in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Even more interesting case: drill down on two fields,
    // but one of them is OR
    ddq = new DrillDownQuery(config);
    // Drill down on Lisa or Bob:
    ddq.add("Author", "Lisa");
    ddq.add("Publish Date", "2010");
    ddq.add("Author", "Bob");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is both drill-sideways + drill-down:
    // Lisa or Bob published twice in 2010 and once in 2012:
    assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // Test drilling down on invalid field:
    ddq = new DrillDownQuery(config);
    ddq.add("Foobar", "Baz");
    r = ds.search(null, ddq, 10);
    assertEquals(0, r.hits.totalHits);
    assertNull(r.facets.getTopChildren(10, "Publish Date"));
    assertNull(r.facets.getTopChildren(10, "Foobar"));
    // Test drilling down on valid term or'd with invalid term:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Tom");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
    // LUCENE-4915: test drilling down on a dimension but
    // NOT facet counting it:
    ddq = new DrillDownQuery(config);
    ddq.add("Author", "Lisa");
    ddq.add("Author", "Tom");
    r = ds.search(null, ddq, 10);
    assertEquals(2, r.hits.totalHits);
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
    // Test main query gets null scorer:
    ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
    ddq.add("Author", "Lisa");
    r = ds.search(null, ddq, 10);
    assertEquals(0, r.hits.totalHits);
    assertNull(r.facets.getTopChildren(10, "Publish Date"));
    assertNull(r.facets.getTopChildren(10, "Author"));
    writer.close();
    IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Aggregations

TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)33 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)25 Directory (org.apache.lucene.store.Directory)20 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)13 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)13 FacetResult (org.apache.lucene.facet.FacetResult)12 Facets (org.apache.lucene.facet.Facets)11 FacetsCollector (org.apache.lucene.facet.FacetsCollector)11 Test (org.junit.Test)11 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9 DrillSidewaysResult (org.apache.lucene.facet.DrillSideways.DrillSidewaysResult)9 Document (org.apache.lucene.document.Document)8 FastTaxonomyFacetCounts (org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts)8 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 ArrayList (java.util.ArrayList)7 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)7 FacetLabel (org.apache.lucene.facet.taxonomy.FacetLabel)7 IndexReader (org.apache.lucene.index.IndexReader)7