Search in sources :

Example 6 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestOrdinalMappingLeafReader method verifyResults.

private void verifyResults(Directory indexDir, Directory taxoDir) throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    FacetsCollector collector = new FacetsCollector();
    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
    // tag facets
    Facets tagFacets = new FastTaxonomyFacetCounts("$tags", taxoReader, facetConfig, collector);
    FacetResult result = tagFacets.getTopChildren(10, "tag");
    for (LabelAndValue lv : result.labelValues) {
        if (VERBOSE) {
            System.out.println(lv);
        }
        assertEquals(NUM_DOCS, lv.value.intValue());
    }
    // id facets
    Facets idFacets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, collector);
    FacetResult idResult = idFacets.getTopChildren(10, "id");
    assertEquals(NUM_DOCS, idResult.childCount);
    // each "id" appears twice
    assertEquals(NUM_DOCS * 2, idResult.value);
    BinaryDocValues bdv = MultiDocValues.getBinaryValues(indexReader, "bdv");
    BinaryDocValues cbdv = MultiDocValues.getBinaryValues(indexReader, "cbdv");
    for (int i = 0; i < indexReader.maxDoc(); i++) {
        assertEquals(i, bdv.nextDoc());
        assertEquals(i, cbdv.nextDoc());
        assertEquals(Integer.parseInt(cbdv.binaryValue().utf8ToString()), Integer.parseInt(bdv.binaryValue().utf8ToString()) * 2);
    }
    IOUtils.close(indexReader, taxoReader);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetResult(org.apache.lucene.facet.FacetResult) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LabelAndValue(org.apache.lucene.facet.LabelAndValue) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) FacetsCollector(org.apache.lucene.facet.FacetsCollector)

Example 7 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestSearcherTaxonomyManager method testDirectory.

public void testDirectory() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    final IndexWriter w = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
    final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
    // first empty commit
    w.commit();
    tw.commit();
    final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
    final FacetsConfig config = new FacetsConfig();
    config.setMultiValued("field", true);
    final AtomicBoolean stop = new AtomicBoolean();
    // How many unique facets to index before stopping:
    final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;
    Thread indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);
    indexer.start();
    try {
        while (!stop.get()) {
            SearcherAndTaxonomy pair = mgr.acquire();
            try {
                //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                FacetsCollector sfc = new FacetsCollector();
                pair.searcher.search(new MatchAllDocsQuery(), sfc);
                Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
                FacetResult result = facets.getTopChildren(10, "field");
                if (pair.searcher.getIndexReader().numDocs() > 0) {
                    //System.out.println(pair.taxonomyReader.getSize());
                    assertTrue(result.childCount > 0);
                    assertTrue(result.labelValues.length > 0);
                }
            //if (VERBOSE) {
            //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
            //}
            } finally {
                mgr.release(pair);
            }
        }
    } finally {
        indexer.join();
    }
    if (VERBOSE) {
        System.out.println("TEST: now stop");
    }
    w.close();
    IOUtils.close(mgr, tw, taxoDir, indexDir);
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Facets(org.apache.lucene.facet.Facets) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) FacetResult(org.apache.lucene.facet.FacetResult) SearcherAndTaxonomy(org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager.SearcherAndTaxonomy) Directory(org.apache.lucene.store.Directory)

Example 8 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestRangeFacetCounts method testCustomDoubleValuesSource.

public void testCustomDoubleValuesSource() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    writer.addDocument(doc);
    writer.addDocument(doc);
    writer.addDocument(doc);
    // Test wants 3 docs in one segment:
    writer.forceMerge(1);
    final DoubleValuesSource vs = new DoubleValuesSource() {

        @Override
        public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
            return new DoubleValues() {

                int doc = -1;

                @Override
                public double doubleValue() throws IOException {
                    return doc + 1;
                }

                @Override
                public boolean advanceExact(int doc) throws IOException {
                    this.doc = doc;
                    return true;
                }
            };
        }

        @Override
        public boolean needsScores() {
            return false;
        }

        @Override
        public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
            return Explanation.match(docId + 1, "");
        }
    };
    FacetsConfig config = new FacetsConfig();
    FacetsCollector fc = new FacetsCollector();
    IndexReader r = writer.getReader();
    IndexSearcher s = newSearcher(r);
    s.search(new MatchAllDocsQuery(), fc);
    final DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) };
    final Query fastMatchFilter;
    final AtomicBoolean filterWasUsed = new AtomicBoolean();
    if (random().nextBoolean()) {
        // Sort of silly:
        final Query in = new MatchAllDocsQuery();
        fastMatchFilter = new UsedQuery(in, filterWasUsed);
    } else {
        fastMatchFilter = null;
    }
    if (VERBOSE) {
        System.out.println("TEST: fastMatchFilter=" + fastMatchFilter);
    }
    Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges);
    assertEquals("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", facets.getTopChildren(10, "field").toString());
    assertTrue(fastMatchFilter == null || filterWasUsed.get());
    DrillDownQuery ddq = new DrillDownQuery(config);
    ddq.add("field", ranges[1].getQuery(fastMatchFilter, vs));
    // Test simple drill-down:
    assertEquals(1, s.search(ddq, 10).totalHits);
    // Test drill-sideways after drill-down
    DrillSideways ds = new DrillSideways(s, config, (TaxonomyReader) null) {

        @Override
        protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
            assert drillSideways.length == 1;
            return new DoubleRangeFacetCounts("field", vs, drillSideways[0], fastMatchFilter, ranges);
        }

        @Override
        protected boolean scoreSubDocsAtOnce() {
            return random().nextBoolean();
        }
    };
    DrillSidewaysResult dsr = ds.search(ddq, 10);
    assertEquals(1, dsr.hits.totalHits);
    assertEquals("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", dsr.facets.getTopChildren(10, "field").toString());
    writer.close();
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FacetsConfig(org.apache.lucene.facet.FacetsConfig) Query(org.apache.lucene.search.Query) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MultiFacets(org.apache.lucene.facet.MultiFacets) Facets(org.apache.lucene.facet.Facets) Explanation(org.apache.lucene.search.Explanation) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) DoubleValues(org.apache.lucene.search.DoubleValues) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DoubleValuesSource(org.apache.lucene.search.DoubleValuesSource) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DrillSideways(org.apache.lucene.facet.DrillSideways) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 9 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestSortedSetDocValuesFacets method testRandom.

public void testRandom() throws Exception {
    String[] tokens = getRandomTokens(10);
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
    FacetsConfig config = new FacetsConfig();
    int numDocs = atLeast(1000);
    int numDims = TestUtil.nextInt(random(), 1, 7);
    List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
    for (TestDoc testDoc : testDocs) {
        Document doc = new Document();
        doc.add(newStringField("content", testDoc.content, Field.Store.NO));
        for (int j = 0; j < numDims; j++) {
            if (testDoc.dims[j] != null) {
                doc.add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j]));
            }
        }
        w.addDocument(config.build(doc));
    }
    // NRT open
    IndexSearcher searcher = newSearcher(w.getReader());
    // Per-top-reader state:
    SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
    ExecutorService exec = randomExecutorServiceOrNull();
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        String searchToken = tokens[random().nextInt(tokens.length)];
        if (VERBOSE) {
            System.out.println("\nTEST: iter content=" + searchToken);
        }
        FacetsCollector fc = new FacetsCollector();
        FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
        Facets facets;
        if (exec != null) {
            facets = new ConcurrentSortedSetDocValuesFacetCounts(state, fc, exec);
        } else {
            facets = new SortedSetDocValuesFacetCounts(state, fc);
        }
        // Slow, yet hopefully bug-free, faceting:
        @SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Integer>[] expectedCounts = new HashMap[numDims];
        for (int i = 0; i < numDims; i++) {
            expectedCounts[i] = new HashMap<>();
        }
        for (TestDoc doc : testDocs) {
            if (doc.content.equals(searchToken)) {
                for (int j = 0; j < numDims; j++) {
                    if (doc.dims[j] != null) {
                        Integer v = expectedCounts[j].get(doc.dims[j]);
                        if (v == null) {
                            expectedCounts[j].put(doc.dims[j], 1);
                        } else {
                            expectedCounts[j].put(doc.dims[j], v.intValue() + 1);
                        }
                    }
                }
            }
        }
        List<FacetResult> expected = new ArrayList<>();
        for (int i = 0; i < numDims; i++) {
            List<LabelAndValue> labelValues = new ArrayList<>();
            int totCount = 0;
            for (Map.Entry<String, Integer> ent : expectedCounts[i].entrySet()) {
                labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
                totCount += ent.getValue();
            }
            sortLabelValues(labelValues);
            if (totCount > 0) {
                expected.add(new FacetResult("dim" + i, new String[0], totCount, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
            }
        }
        // Sort by highest value, tie break by value:
        sortFacetResults(expected);
        List<FacetResult> actual = facets.getAllDims(10);
        // Messy: fixup ties
        //sortTies(actual);
        assertEquals(expected, actual);
    }
    if (exec != null) {
        exec.shutdownNow();
    }
    w.close();
    IOUtils.close(searcher.getIndexReader(), indexDir, taxoDir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) LabelAndValue(org.apache.lucene.facet.LabelAndValue) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) FacetsConfig(org.apache.lucene.facet.FacetsConfig) Term(org.apache.lucene.index.Term) FacetsCollector(org.apache.lucene.facet.FacetsCollector) ExecutorService(java.util.concurrent.ExecutorService) FacetResult(org.apache.lucene.facet.FacetResult) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 10 with Facets

use of org.apache.lucene.facet.Facets in project lucene-solr by apache.

the class TestSortedSetDocValuesFacets method testSparseFacets.

// LUCENE-5333
public void testSparseFacets() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    FacetsConfig config = new FacetsConfig();
    Document doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("a", "foo1"));
    writer.addDocument(config.build(doc));
    if (random().nextBoolean()) {
        writer.commit();
    }
    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("a", "foo2"));
    doc.add(new SortedSetDocValuesFacetField("b", "bar1"));
    writer.addDocument(config.build(doc));
    if (random().nextBoolean()) {
        writer.commit();
    }
    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("a", "foo3"));
    doc.add(new SortedSetDocValuesFacetField("b", "bar2"));
    doc.add(new SortedSetDocValuesFacetField("c", "baz1"));
    writer.addDocument(config.build(doc));
    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();
    // Per-top-reader state:
    SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
    ExecutorService exec = randomExecutorServiceOrNull();
    Facets facets = getAllFacets(searcher, state, exec);
    // Ask for top 10 labels for any dims that have counts:
    List<FacetResult> results = facets.getAllDims(10);
    assertEquals(3, results.size());
    assertEquals("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n  foo2 (1)\n  foo3 (1)\n", results.get(0).toString());
    assertEquals("dim=b path=[] value=2 childCount=2\n  bar1 (1)\n  bar2 (1)\n", results.get(1).toString());
    assertEquals("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", results.get(2).toString());
    Collection<Accountable> resources = state.getChildResources();
    assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
    if (searcher.getIndexReader().leaves().size() > 1) {
        assertTrue(state.ramBytesUsed() > 0);
        assertFalse(resources.isEmpty());
        assertTrue(resources.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
    } else {
        assertEquals(0, state.ramBytesUsed());
        assertTrue(resources.isEmpty());
    }
    if (exec != null) {
        exec.shutdownNow();
    }
    searcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FacetsConfig(org.apache.lucene.facet.FacetsConfig) Facets(org.apache.lucene.facet.Facets) Accountable(org.apache.lucene.util.Accountable) Document(org.apache.lucene.document.Document) ExecutorService(java.util.concurrent.ExecutorService) FacetResult(org.apache.lucene.facet.FacetResult) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

Facets (org.apache.lucene.facet.Facets)72 FacetsCollector (org.apache.lucene.facet.FacetsCollector)60 FacetResult (org.apache.lucene.facet.FacetResult)47 IndexSearcher (org.apache.lucene.search.IndexSearcher)46 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)37 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)35 Directory (org.apache.lucene.store.Directory)34 Document (org.apache.lucene.document.Document)32 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)32 FacetsConfig (org.apache.lucene.facet.FacetsConfig)29 DirectoryReader (org.apache.lucene.index.DirectoryReader)27 LabelAndValue (org.apache.lucene.facet.LabelAndValue)23 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)22 FacetField (org.apache.lucene.facet.FacetField)19 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)14 DefaultSortedSetDocValuesReaderState (org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState)14 SortedSetDocValuesFacetCounts (org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts)14 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)13 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)13