Examples with DirectoryTaxonomyWriter - org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter

Example 16 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyFacetCounts method testRandom.

public void testRandom() throws Exception {
    String[] tokens = getRandomTokens(10);
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), indexDir);
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
    FacetsConfig config = new FacetsConfig();
    int numDocs = atLeast(1000);
    int numDims = TestUtil.nextInt(random(), 1, 7);
    List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
    for (TestDoc testDoc : testDocs) {
        Document doc = new Document();
        doc.add(newStringField("content", testDoc.content, Field.Store.NO));
        for (int j = 0; j < numDims; j++) {
            if (testDoc.dims[j] != null) {
                doc.add(new FacetField("dim" + j, testDoc.dims[j]));
            }
        }
        w.addDocument(config.build(tw, doc));
    }
    // NRT open
    IndexSearcher searcher = newSearcher(w.getReader());
    // NRT open
    TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        String searchToken = tokens[random().nextInt(tokens.length)];
        if (VERBOSE) {
            System.out.println("\nTEST: iter content=" + searchToken);
        }
        FacetsCollector fc = new FacetsCollector();
        FacetsCollector.search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
        Facets facets = getTaxonomyFacetCounts(tr, config, fc);
        // Slow, yet hopefully bug-free, faceting:
        @SuppressWarnings({ "rawtypes", "unchecked" }) Map<String, Integer>[] expectedCounts = new HashMap[numDims];
        for (int i = 0; i < numDims; i++) {
            expectedCounts[i] = new HashMap<>();
        }
        for (TestDoc doc : testDocs) {
            if (doc.content.equals(searchToken)) {
                for (int j = 0; j < numDims; j++) {
                    if (doc.dims[j] != null) {
                        Integer v = expectedCounts[j].get(doc.dims[j]);
                        if (v == null) {
                            expectedCounts[j].put(doc.dims[j], 1);
                        } else {
                            expectedCounts[j].put(doc.dims[j], v.intValue() + 1);
                        }
                    }
                }
            }
        }
        List<FacetResult> expected = new ArrayList<>();
        for (int i = 0; i < numDims; i++) {
            List<LabelAndValue> labelValues = new ArrayList<>();
            int totCount = 0;
            for (Map.Entry<String, Integer> ent : expectedCounts[i].entrySet()) {
                labelValues.add(new LabelAndValue(ent.getKey(), ent.getValue()));
                totCount += ent.getValue();
            }
            sortLabelValues(labelValues);
            if (totCount > 0) {
                expected.add(new FacetResult("dim" + i, new String[0], totCount, labelValues.toArray(new LabelAndValue[labelValues.size()]), labelValues.size()));
            }
        }
        // Sort by highest value, tie break by value:
        sortFacetResults(expected);
        List<FacetResult> actual = facets.getAllDims(10);
        // Messy: fixup ties
        sortTies(actual);
        assertEquals(expected, actual);
    }
    w.close();
    IOUtils.close(tw, searcher.getIndexReader(), tr, indexDir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Facets(org.apache.lucene.facet.Facets) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) LabelAndValue(org.apache.lucene.facet.LabelAndValue) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) TermQuery(org.apache.lucene.search.TermQuery) FacetsConfig(org.apache.lucene.facet.FacetsConfig) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Term(org.apache.lucene.index.Term) FacetsCollector(org.apache.lucene.facet.FacetsCollector) FacetResult(org.apache.lucene.facet.FacetResult) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 17 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyCombined method testChildrenArrays.

/**
   * Test TaxonomyReader's child browsing method, getChildrenArrays()
   * This only tests for correctness of the data on one example - we have
   * below further tests on data refresh etc.
   */
@Test
public void testChildrenArrays() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
    int[] youngestChildArray = ca.children();
    assertEquals(tr.getSize(), youngestChildArray.length);
    int[] olderSiblingArray = ca.siblings();
    assertEquals(tr.getSize(), olderSiblingArray.length);
    for (int i = 0; i < expectedCategories.length; i++) {
        // find expected children by looking at all expectedCategories
        // for children
        ArrayList<Integer> expectedChildren = new ArrayList<>();
        for (int j = expectedCategories.length - 1; j >= 0; j--) {
            if (expectedCategories[j].length != expectedCategories[i].length + 1) {
                // not longer by 1, so can't be a child
                continue;
            }
            boolean ischild = true;
            for (int k = 0; k < expectedCategories[i].length; k++) {
                if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
                    ischild = false;
                    break;
                }
            }
            if (ischild) {
                expectedChildren.add(j);
            }
        }
        // correct reverse (youngest to oldest) order:
        if (expectedChildren.size() == 0) {
            assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
        } else {
            int child = youngestChildArray[i];
            assertEquals(expectedChildren.get(0).intValue(), child);
            for (int j = 1; j < expectedChildren.size(); j++) {
                child = olderSiblingArray[child];
                assertEquals(expectedChildren.get(j).intValue(), child);
            // if child is INVALID_ORDINAL we should stop, but
            // assertEquals would fail in this case anyway.
            }
            // When we're done comparing, olderSiblingArray should now point
            // to INVALID_ORDINAL, saying there are no more children. If it
            // doesn't, we found too many children...
            assertEquals(-1, olderSiblingArray[child]);
        }
    }
    tr.close();
    indexDir.close();
}

Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) ArrayList(java.util.ArrayList) SlowRAMDirectory(org.apache.lucene.facet.SlowRAMDirectory) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 18 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyCombined method testWriterTwice.

/**  testWriterTwice is exactly like testWriter, except that after adding
    all the categories, we add them again, and see that we get the same
    old ids again - not new categories.
   */
@Test
public void testWriterTwice() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    // run fillTaxonomy again - this will try to add the same categories
    // again, and check that we see the same ordinal paths again, not
    // different ones. 
    fillTaxonomy(tw);
    // Let's check the number of categories again, to see that no
    // extraneous categories were created:
    assertEquals(expectedCategories.length, tw.getSize());
    tw.close();
    indexDir.close();
}

Example 19 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyCombined method testWriterCheckPaths2.

/**
   * testWriterCheckPaths2 is the path-checking variant of testWriterTwice
   * and testWriterTwice2. After adding all the categories, we add them again,
   * and see that we get the same old ids and paths. We repeat the path checking
   * yet again after closing and opening the index for writing again - to see
   * that the reading of existing data from disk works as well.
   */
@Test
public void testWriterCheckPaths2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    checkPaths(tw);
    fillTaxonomy(tw);
    checkPaths(tw);
    tw.close();
    tw = new DirectoryTaxonomyWriter(indexDir);
    checkPaths(tw);
    fillTaxonomy(tw);
    checkPaths(tw);
    tw.close();
    indexDir.close();
}

Example 20 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class TestTaxonomyCombined method testTaxonomyReaderRefreshRaces.

// Test that getParentArrays is valid when retrieved during refresh
@Test
public void testTaxonomyReaderRefreshRaces() throws Exception {
    // compute base child arrays - after first chunk, and after the other
    Directory indexDirBase = newDirectory();
    TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
    twBase.addCategory(new FacetLabel("a", "0"));
    final FacetLabel abPath = new FacetLabel("a", "b");
    twBase.addCategory(abPath);
    twBase.commit();
    TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
    final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
    final int abOrd = trBase.getOrdinal(abPath);
    final int abYoungChildBase1 = ca1.children()[abOrd];
    final int numCategories = atLeast(800);
    for (int i = 0; i < numCategories; i++) {
        twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i)));
    }
    twBase.close();
    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
    assertNotNull(newTaxoReader);
    trBase.close();
    trBase = newTaxoReader;
    final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
    final int abYoungChildBase2 = ca2.children()[abOrd];
    int numRetries = atLeast(50);
    for (int retry = 0; retry < numRetries; retry++) {
        assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
    }
    trBase.close();
    indexDirBase.close();
}

Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SlowRAMDirectory(org.apache.lucene.facet.SlowRAMDirectory) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Aggregations

DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)84 Directory (org.apache.lucene.store.Directory)72 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)52 Document (org.apache.lucene.document.Document)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 FacetsConfig (org.apache.lucene.facet.FacetsConfig)35 FacetField (org.apache.lucene.facet.FacetField)31 Test (org.junit.Test)28 IndexSearcher (org.apache.lucene.search.IndexSearcher)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)26 IndexWriter (org.apache.lucene.index.IndexWriter)25 Facets (org.apache.lucene.facet.Facets)22 SlowRAMDirectory (org.apache.lucene.facet.SlowRAMDirectory)21 FacetsCollector (org.apache.lucene.facet.FacetsCollector)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)15 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)15 FacetResult (org.apache.lucene.facet.FacetResult)14 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)12 Term (org.apache.lucene.index.Term)9