use of org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache in project lucene-solr by apache.
the class TestDirectoryTaxonomyWriter method testConcurrency.
public void testConcurrency() throws Exception {
// add many categories
final int ncats = atLeast(100000);
// affects the categories selection
final int range = ncats * 3;
final AtomicInteger numCats = new AtomicInteger(ncats);
final Directory dir = newDirectory();
final ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();
final double d = random().nextDouble();
final TaxonomyWriterCache cache;
if (d < 0.7) {
// this is the fastest, yet most memory consuming
cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
} else if (TEST_NIGHTLY && d > 0.98) {
// this is the slowest, but tests the writer concurrency when no caching is done.
// only pick it during NIGHTLY tests, and even then, with very low chances.
cache = NO_OP_CACHE;
} else {
// this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
cache = new LruTaxonomyWriterCache(ncats / 10);
}
if (VERBOSE) {
System.out.println("TEST: use cache=" + cache);
}
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
Thread[] addThreads = new Thread[atLeast(4)];
for (int z = 0; z < addThreads.length; z++) {
addThreads[z] = new Thread() {
@Override
public void run() {
Random random = random();
while (numCats.decrementAndGet() > 0) {
try {
int value = random.nextInt(range);
FacetLabel cp = new FacetLabel(Integer.toString(value / 1000), Integer.toString(value / 10000), Integer.toString(value / 100000), Integer.toString(value));
int ord = tw.addCategory(cp);
assertTrue("invalid parent for ordinal " + ord + ", category " + cp, tw.getParent(ord) != -1);
String l1 = FacetsConfig.pathToString(cp.components, 1);
String l2 = FacetsConfig.pathToString(cp.components, 2);
String l3 = FacetsConfig.pathToString(cp.components, 3);
String l4 = FacetsConfig.pathToString(cp.components, 4);
values.put(l1, l1);
values.put(l2, l2);
values.put(l3, l3);
values.put(l4, l4);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : addThreads) t.start();
for (Thread t : addThreads) t.join();
tw.close();
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
// +1 for root category
if (values.size() + 1 != dtr.getSize()) {
for (String value : values.keySet()) {
FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
if (dtr.getOrdinal(label) == -1) {
System.out.println("FAIL: path=" + label + " not recognized");
}
}
fail("mismatch number of categories");
}
int[] parents = dtr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
int level = cp.length;
// for root, parent is always virtual ROOT (ord=0)
int parentOrd = 0;
FacetLabel path = new FacetLabel();
for (int i = 0; i < level; i++) {
path = cp.subpath(i + 1);
int ord = dtr.getOrdinal(path);
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
// next level should have this parent
parentOrd = ord;
}
}
IOUtils.close(dtr, dir);
}
use of org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache in project lucene-solr by apache.
the class TestDirectoryTaxonomyWriter method testHugeLabel.
@Test
public void testHugeLabel() throws Exception {
Directory indexDir = newDirectory(), taxoDir = newDirectory();
IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
FacetsConfig config = new FacetsConfig();
// Add one huge label:
String bigs = null;
int ordinal = -1;
// for the dimension and separator
int len = FacetLabel.MAX_CATEGORY_PATH_LENGTH - 4;
bigs = TestUtil.randomSimpleString(random(), len, len);
FacetField ff = new FacetField("dim", bigs);
FacetLabel cp = new FacetLabel("dim", bigs);
ordinal = taxoWriter.addCategory(cp);
Document doc = new Document();
doc.add(ff);
indexWriter.addDocument(config.build(taxoWriter, doc));
// Add tiny ones to cause a re-hash
for (int i = 0; i < 3; i++) {
String s = TestUtil.randomSimpleString(random(), 1, 10);
taxoWriter.addCategory(new FacetLabel("dim", s));
doc = new Document();
doc.add(new FacetField("dim", s));
indexWriter.addDocument(config.build(taxoWriter, doc));
}
// when too large components were allowed to be added, this resulted in a new added category
assertEquals(ordinal, taxoWriter.addCategory(cp));
indexWriter.close();
IOUtils.close(taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
DrillDownQuery ddq = new DrillDownQuery(new FacetsConfig());
ddq.add("dim", bigs);
assertEquals(1, searcher.search(ddq, 10).totalHits);
IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}
Aggregations