use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.
the class LuceneNativeFacet method facetsWithSearch.
/**
* User runs a query and counts facets.
*/
private List<FacetResult> facetsWithSearch() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
FacetsCollector fc = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
// Retrieve results
List<FacetResult> results = new ArrayList<FacetResult>();
// Count both "Publish Date" and "Author" dimensions
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
results.add(facets.getTopChildren(10, "Author"));
results.add(facets.getTopChildren(10, "Publish Date"));
indexReader.close();
taxoReader.close();
return results;
}
use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.
the class LuceneNativeFacet method drillSideways.
/**
* User drills down on 'Publish Date/2010', and we return facets for both 'Publish Date' and 'Author', using DrillSideways.
*/
private List<FacetResult> drillSideways() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(config);
// Now user drills down on Publish Date/2010:
q.add("Publish Date", "2010");
DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
DrillSidewaysResult result = ds.search(q, 10);
// Retrieve results
List<FacetResult> facets = result.facets.getAllDims(10);
indexReader.close();
taxoReader.close();
return facets;
}
use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.
the class LuceneNativeFacet method facetsOnly.
/**
* User runs a query and counts facets only without collecting the matching documents.
*/
private List<FacetResult> facetsOnly() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
FacetsCollector fc = new FacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query:
searcher.search(new MatchAllDocsQuery(), null, /* Filter */
fc);
// Retrieve results
List<FacetResult> results = new ArrayList<FacetResult>();
// Count both "Publish Date" and "Author" dimensions
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
results.add(facets.getTopChildren(10, "Author"));
results.add(facets.getTopChildren(10, "Publish Date"));
indexReader.close();
taxoReader.close();
return results;
}
use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project orientdb by orientechnologies.
the class LuceneNativeFacet method drillDown.
/**
* User drills down on 'Publish Date/2010', and we return facets for 'Author'
*/
private FacetResult drillDown() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(config);
// Now user drills down on Publish Date/2010:
q.add("Publish Date", "2010");
FacetsCollector fc = new FacetsCollector();
FacetsCollector.search(searcher, q, 10, fc);
// Retrieve results
Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
FacetResult result = facets.getTopChildren(10, "Author");
indexReader.close();
taxoReader.close();
return result;
}
use of org.apache.lucene.facet.taxonomy.TaxonomyReader in project lucene-solr by apache.
the class TestDrillSideways method testBasic.
public void testBasic() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
writer.addDocument(config.build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
//System.out.println("searcher=" + searcher);
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
// case: drill-down on a single field; in this
// case the drill-sideways + drill-down counts ==
// drill-down of just the query:
DrillDownQuery ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
DrillSidewaysResult r = ds.search(null, ddq, 10);
assertEquals(2, r.hits.totalHits);
// Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010:
assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob
// published once:
assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
// Same simple case, but no baseQuery (pure browse):
// drill-down on a single field; in this case the
// drill-sideways + drill-down counts == drill-down of
// just the query:
ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
r = ds.search(null, ddq, 10);
assertEquals(2, r.hits.totalHits);
// Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010:
assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob
// published once:
assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
// Another simple case: drill-down on single fields
// but OR of two values
ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
ddq.add("Author", "Bob");
r = ds.search(null, ddq, 10);
assertEquals(3, r.hits.totalHits);
// Publish Date is only drill-down: Lisa and Bob
// (drill-down) published twice in 2010 and once in 2012:
assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob
// published once:
assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
assertTrue(r.facets instanceof MultiFacets);
List<FacetResult> allResults = r.facets.getAllDims(10);
assertEquals(2, allResults.size());
assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", allResults.get(0).toString());
assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", allResults.get(1).toString());
// More interesting case: drill-down on two fields
ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
ddq.add("Publish Date", "2010");
r = ds.search(null, ddq, 10);
assertEquals(1, r.hits.totalHits);
// Publish Date is drill-sideways + drill-down: Lisa
// (drill-down) published once in 2010 and once in 2012:
assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down:
// only Lisa & Bob published (once each) in 2010:
assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
// Even more interesting case: drill down on two fields,
// but one of them is OR
ddq = new DrillDownQuery(config);
// Drill down on Lisa or Bob:
ddq.add("Author", "Lisa");
ddq.add("Publish Date", "2010");
ddq.add("Author", "Bob");
r = ds.search(null, ddq, 10);
assertEquals(2, r.hits.totalHits);
// Publish Date is both drill-sideways + drill-down:
// Lisa or Bob published twice in 2010 and once in 2012:
assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down:
// only Lisa & Bob published (once each) in 2010:
assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
// Test drilling down on invalid field:
ddq = new DrillDownQuery(config);
ddq.add("Foobar", "Baz");
r = ds.search(null, ddq, 10);
assertEquals(0, r.hits.totalHits);
assertNull(r.facets.getTopChildren(10, "Publish Date"));
assertNull(r.facets.getTopChildren(10, "Foobar"));
// Test drilling down on valid term or'd with invalid term:
ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
ddq.add("Author", "Tom");
r = ds.search(null, ddq, 10);
assertEquals(2, r.hits.totalHits);
// Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010:
assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Author is drill-sideways + drill-down: Lisa
// (drill-down) published twice, and Frank/Susan/Bob
// published once:
assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
// LUCENE-4915: test drilling down on a dimension but
// NOT facet counting it:
ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa");
ddq.add("Author", "Tom");
r = ds.search(null, ddq, 10);
assertEquals(2, r.hits.totalHits);
// Publish Date is only drill-down, and Lisa published
// one in 2012 and one in 2010:
assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
// Test main query gets null scorer:
ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
ddq.add("Author", "Lisa");
r = ds.search(null, ddq, 10);
assertEquals(0, r.hits.totalHits);
assertNull(r.facets.getTopChildren(10, "Publish Date"));
assertNull(r.facets.getTopChildren(10, "Author"));
writer.close();
IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}
Aggregations