Search in sources :

Example 11 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestDirectoryTaxonomyWriter method testReplaceTaxonomy.

@Test
public void testReplaceTaxonomy() throws Exception {
    Directory input = newDirectory();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
    int ordA = taxoWriter.addCategory(new FacetLabel("a"));
    taxoWriter.close();
    Directory dir = newDirectory();
    taxoWriter = new DirectoryTaxonomyWriter(dir);
    int ordB = taxoWriter.addCategory(new FacetLabel("b"));
    taxoWriter.addCategory(new FacetLabel("c"));
    taxoWriter.commit();
    long origEpoch = getEpoch(dir);
    // replace the taxonomy with the input one
    taxoWriter.replaceTaxonomy(input);
    // LUCENE-4633: make sure that category "a" is not added again in any case
    taxoWriter.addTaxonomy(input, new MemoryOrdinalMap());
    // root + 'a'
    assertEquals("no categories should have been added", 2, taxoWriter.getSize());
    assertEquals("category 'a' received new ordinal?", ordA, taxoWriter.addCategory(new FacetLabel("a")));
    // add the same category again -- it should not receive the same ordinal !
    int newOrdB = taxoWriter.addCategory(new FacetLabel("b"));
    assertNotSame("new ordinal cannot be the original ordinal", ordB, newOrdB);
    assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdB);
    taxoWriter.close();
    long newEpoch = getEpoch(dir);
    assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
    dir.close();
    input.close();
}
Also used : MemoryOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 12 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestCompactLabelToOrdinal method testL2O.

@Test
public void testL2O() throws Exception {
    LabelToOrdinal map = new LabelToOrdinalMap();
    CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);
    final int n = atLeast(10 * 1000);
    final int numUniqueValues = 50 * 1000;
    String[] uniqueValues = new String[numUniqueValues];
    byte[] buffer = new byte[50];
    Random random = random();
    for (int i = 0; i < numUniqueValues; ) {
        random.nextBytes(buffer);
        int size = 1 + random.nextInt(buffer.length);
        // This test is turning random bytes into a string,
        // this is asking for trouble.
        CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE).onMalformedInput(CodingErrorAction.REPLACE);
        uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
        // we cannot have empty path components, so eliminate all prefix as well
        // as middle consecutive delimiter chars.
        uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
        if (uniqueValues[i].startsWith("/")) {
            uniqueValues[i] = uniqueValues[i].substring(1);
        }
        if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
            i++;
        }
    }
    Path tmpDir = createTempDir("testLableToOrdinal");
    Path f = tmpDir.resolve("CompactLabelToOrdinalTest.tmp");
    int flushInterval = 10;
    for (int i = 0; i < n; i++) {
        if (i > 0 && i % flushInterval == 0) {
            compact.flush(f);
            compact = CompactLabelToOrdinal.open(f, 0.15f, 3);
            Files.delete(f);
            if (flushInterval < (n / 10)) {
                flushInterval *= 10;
            }
        }
        int index = random.nextInt(numUniqueValues);
        FacetLabel label;
        String s = uniqueValues[index];
        if (s.length() == 0) {
            label = new FacetLabel();
        } else {
            label = new FacetLabel(s.split("/"));
        }
        int ord1 = map.getOrdinal(label);
        int ord2 = compact.getOrdinal(label);
        assertEquals(ord1, ord2);
        if (ord1 == LabelToOrdinal.INVALID_ORDINAL) {
            ord1 = compact.getNextOrdinal();
            map.addLabel(label, ord1);
            compact.addLabel(label, ord1);
        }
    }
    for (int i = 0; i < numUniqueValues; i++) {
        FacetLabel label;
        String s = uniqueValues[i];
        if (s.length() == 0) {
            label = new FacetLabel();
        } else {
            label = new FacetLabel(s.split("/"));
        }
        int ord1 = map.getOrdinal(label);
        int ord2 = compact.getOrdinal(label);
        assertEquals(ord1, ord2);
    }
}
Also used : Path(java.nio.file.Path) CharsetDecoder(java.nio.charset.CharsetDecoder) Random(java.util.Random) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) Test(org.junit.Test)

Example 13 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestAddTaxonomy method testAddToEmpty.

public void testAddToEmpty() throws Exception {
    Directory dest = newDirectory();
    Directory src = newDirectory();
    DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
    srcTW.addCategory(new FacetLabel("Author", "Rob Pike"));
    srcTW.addCategory(new FacetLabel("Aardvarks", "Bob"));
    srcTW.close();
    DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
    OrdinalMap map = randomOrdinalMap();
    destTW.addTaxonomy(src, map);
    destTW.close();
    validate(dest, src, map);
    IOUtils.close(dest, src);
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) DiskOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap) MemoryOrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap) OrdinalMap(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap) Directory(org.apache.lucene.store.Directory)

Example 14 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestDirectoryTaxonomyReader method testOpenIfChangedManySegments.

@Test
public void testOpenIfChangedManySegments() throws Exception {
    // test openIfChanged() when the taxonomy contains many segments
    Directory dir = newDirectory();
    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {

        @Override
        protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
            IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
            LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
            lmp.setMergeFactor(2);
            return conf;
        }
    };
    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
    int numRounds = random().nextInt(10) + 10;
    // one for root
    int numCategories = 1;
    for (int i = 0; i < numRounds; i++) {
        int numCats = random().nextInt(4) + 1;
        for (int j = 0; j < numCats; j++) {
            writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
        }
        numCategories += numCats + 1;
        TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
        assertNotNull(newtr);
        reader.close();
        reader = newtr;
        // assert categories
        assertEquals(numCategories, reader.getSize());
        int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
        int[] parents = reader.getParallelTaxonomyArrays().parents();
        // round's parent is root
        assertEquals(0, parents[roundOrdinal]);
        for (int j = 0; j < numCats; j++) {
            int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
            // round's parent is root
            assertEquals(roundOrdinal, parents[ord]);
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) LogMergePolicy(org.apache.lucene.index.LogMergePolicy) OpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 15 with FacetLabel

use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.

the class TestDirectoryTaxonomyReader method testOpenIfChangedReplaceTaxonomy.

@Test
public void testOpenIfChangedReplaceTaxonomy() throws Exception {
    // test openIfChanged when replaceTaxonomy is called, which is equivalent to recreate
    // only can work with NRT as well
    Directory src = newDirectory();
    DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
    FacetLabel cp_b = new FacetLabel("b");
    w.addCategory(cp_b);
    w.close();
    for (boolean nrt : new boolean[] { false, true }) {
        Directory dir = newDirectory();
        DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
        FacetLabel cp_a = new FacetLabel("a");
        writer.addCategory(cp_a);
        if (!nrt)
            writer.commit();
        DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
        // fill r1's caches
        assertEquals(1, r1.getOrdinal(cp_a));
        assertEquals(cp_a, r1.getPath(1));
        // now replace taxonomy
        writer.replaceTaxonomy(src);
        if (!nrt)
            writer.commit();
        DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
        assertNotNull(r2);
        // fill r2's caches
        assertEquals(1, r2.getOrdinal(cp_b));
        assertEquals(cp_b, r2.getPath(1));
        // check that r1 doesn't see cp_b
        assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
        assertEquals(cp_a, r1.getPath(1));
        // check that r2 doesn't see cp_a
        assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
        assertEquals(cp_b, r2.getPath(1));
        r2.close();
        r1.close();
        writer.close();
        dir.close();
    }
    src.close();
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Aggregations

FacetLabel (org.apache.lucene.facet.taxonomy.FacetLabel)43 Directory (org.apache.lucene.store.Directory)32 Test (org.junit.Test)25 RAMDirectory (org.apache.lucene.store.RAMDirectory)13 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)7 MemoryOrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 IOException (java.io.IOException)5 Random (java.util.Random)5 DiskOrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap)5 OrdinalMap (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Document (org.apache.lucene.document.Document)3