Search in sources :

Example 1 with Cl2oTaxonomyWriterCache

use of org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache in project lucene-solr by apache.

the class TestDirectoryTaxonomyWriter method testConcurrency.

public void testConcurrency() throws Exception {
    // add many categories
    final int ncats = atLeast(100000);
    // affects the categories selection
    final int range = ncats * 3;
    final AtomicInteger numCats = new AtomicInteger(ncats);
    final Directory dir = newDirectory();
    final ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();
    final double d = random().nextDouble();
    final TaxonomyWriterCache cache;
    if (d < 0.7) {
        // this is the fastest, yet most memory consuming
        cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
    } else if (TEST_NIGHTLY && d > 0.98) {
        // this is the slowest, but tests the writer concurrency when no caching is done.
        // only pick it during NIGHTLY tests, and even then, with very low chances.
        cache = NO_OP_CACHE;
    } else {
        // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
        cache = new LruTaxonomyWriterCache(ncats / 10);
    }
    if (VERBOSE) {
        System.out.println("TEST: use cache=" + cache);
    }
    final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
    Thread[] addThreads = new Thread[atLeast(4)];
    for (int z = 0; z < addThreads.length; z++) {
        addThreads[z] = new Thread() {

            @Override
            public void run() {
                Random random = random();
                while (numCats.decrementAndGet() > 0) {
                    try {
                        int value = random.nextInt(range);
                        FacetLabel cp = new FacetLabel(Integer.toString(value / 1000), Integer.toString(value / 10000), Integer.toString(value / 100000), Integer.toString(value));
                        int ord = tw.addCategory(cp);
                        assertTrue("invalid parent for ordinal " + ord + ", category " + cp, tw.getParent(ord) != -1);
                        String l1 = FacetsConfig.pathToString(cp.components, 1);
                        String l2 = FacetsConfig.pathToString(cp.components, 2);
                        String l3 = FacetsConfig.pathToString(cp.components, 3);
                        String l4 = FacetsConfig.pathToString(cp.components, 4);
                        values.put(l1, l1);
                        values.put(l2, l2);
                        values.put(l3, l3);
                        values.put(l4, l4);
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
        };
    }
    for (Thread t : addThreads) t.start();
    for (Thread t : addThreads) t.join();
    tw.close();
    DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
    // +1 for root category
    if (values.size() + 1 != dtr.getSize()) {
        for (String value : values.keySet()) {
            FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
            if (dtr.getOrdinal(label) == -1) {
                System.out.println("FAIL: path=" + label + " not recognized");
            }
        }
        fail("mismatch number of categories");
    }
    int[] parents = dtr.getParallelTaxonomyArrays().parents();
    for (String cat : values.keySet()) {
        FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
        assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
        int level = cp.length;
        // for root, parent is always virtual ROOT (ord=0)
        int parentOrd = 0;
        FacetLabel path = new FacetLabel();
        for (int i = 0; i < level; i++) {
            path = cp.subpath(i + 1);
            int ord = dtr.getOrdinal(path);
            assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
            // next level should have this parent
            parentOrd = ord;
        }
    }
    IOUtils.close(dtr, dir);
}
Also used : LruTaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) IOException(java.io.IOException) Cl2oTaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache) Random(java.util.Random) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache) Cl2oTaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache) LruTaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache) Directory(org.apache.lucene.store.Directory)

Example 2 with Cl2oTaxonomyWriterCache

use of org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache in project lucene-solr by apache.

the class TestDirectoryTaxonomyWriter method testHugeLabel.

@Test
public void testHugeLabel() throws Exception {
    Directory indexDir = newDirectory(), taxoDir = newDirectory();
    IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
    FacetsConfig config = new FacetsConfig();
    // Add one huge label:
    String bigs = null;
    int ordinal = -1;
    // for the dimension and separator
    int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4;
    bigs = TestUtil.randomSimpleString(random(), len, len);
    FacetField ff = new FacetField("dim", bigs);
    FacetLabel cp = new FacetLabel("dim", bigs);
    ordinal = taxoWriter.addCategory(cp);
    Document doc = new Document();
    doc.add(ff);
    indexWriter.addDocument(config.build(taxoWriter, doc));
    // Add tiny ones to cause a re-hash
    for (int i = 0; i < 3; i++) {
        String s = TestUtil.randomSimpleString(random(), 1, 10);
        taxoWriter.addCategory(new FacetLabel("dim", s));
        doc = new Document();
        doc.add(new FacetField("dim", s));
        indexWriter.addDocument(config.build(taxoWriter, doc));
    }
    // when too large components were allowed to be added, this resulted in a new added category
    assertEquals(ordinal, taxoWriter.addCategory(cp));
    indexWriter.close();
    IOUtils.close(taxoWriter);
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
    ddq.add("dim", bigs);
    assertEquals(1, searcher.search(ddq, 10).totalHits);
    IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FacetsConfig(org.apache.lucene.facet.FacetsConfig) DirectoryReader(org.apache.lucene.index.DirectoryReader) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) DrillDownQuery(org.apache.lucene.facet.DrillDownQuery) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) Cl2oTaxonomyWriterCache(org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Aggregations

FacetLabel (org.apache.lucene.facet.taxonomy.FacetLabel)2 Cl2oTaxonomyWriterCache (org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache)2 Directory (org.apache.lucene.store.Directory)2 IOException (java.io.IOException)1 Random (java.util.Random)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Document (org.apache.lucene.document.Document)1 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)1 FacetField (org.apache.lucene.facet.FacetField)1 FacetsConfig (org.apache.lucene.facet.FacetsConfig)1 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)1 LruTaxonomyWriterCache (org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache)1 TaxonomyWriterCache (org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Test (org.junit.Test)1