use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestDirectoryTaxonomyWriter method testReplaceTaxonomy.
@Test
public void testReplaceTaxonomy() throws Exception {
Directory input = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
int ordA = taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.close();
Directory dir = newDirectory();
taxoWriter = new DirectoryTaxonomyWriter(dir);
int ordB = taxoWriter.addCategory(new FacetLabel("b"));
taxoWriter.addCategory(new FacetLabel("c"));
taxoWriter.commit();
long origEpoch = getEpoch(dir);
// replace the taxonomy with the input one
taxoWriter.replaceTaxonomy(input);
// LUCENE-4633: make sure that category "a" is not added again in any case
taxoWriter.addTaxonomy(input, new MemoryOrdinalMap());
// root + 'a'
assertEquals("no categories should have been added", 2, taxoWriter.getSize());
assertEquals("category 'a' received new ordinal?", ordA, taxoWriter.addCategory(new FacetLabel("a")));
// add the same category again -- it should not receive the same ordinal !
int newOrdB = taxoWriter.addCategory(new FacetLabel("b"));
assertNotSame("new ordinal cannot be the original ordinal", ordB, newOrdB);
assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdB);
taxoWriter.close();
long newEpoch = getEpoch(dir);
assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
dir.close();
input.close();
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestCompactLabelToOrdinal method testL2O.
@Test
public void testL2O() throws Exception {
LabelToOrdinal map = new LabelToOrdinalMap();
CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3);
final int n = atLeast(10 * 1000);
final int numUniqueValues = 50 * 1000;
String[] uniqueValues = new String[numUniqueValues];
byte[] buffer = new byte[50];
Random random = random();
for (int i = 0; i < numUniqueValues; ) {
random.nextBytes(buffer);
int size = 1 + random.nextInt(buffer.length);
// This test is turning random bytes into a string,
// this is asking for trouble.
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE).onMalformedInput(CodingErrorAction.REPLACE);
uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
// we cannot have empty path components, so eliminate all prefix as well
// as middle consecutive delimiter chars.
uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
if (uniqueValues[i].startsWith("/")) {
uniqueValues[i] = uniqueValues[i].substring(1);
}
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
i++;
}
}
Path tmpDir = createTempDir("testLableToOrdinal");
Path f = tmpDir.resolve("CompactLabelToOrdinalTest.tmp");
int flushInterval = 10;
for (int i = 0; i < n; i++) {
if (i > 0 && i % flushInterval == 0) {
compact.flush(f);
compact = CompactLabelToOrdinal.open(f, 0.15f, 3);
Files.delete(f);
if (flushInterval < (n / 10)) {
flushInterval *= 10;
}
}
int index = random.nextInt(numUniqueValues);
FacetLabel label;
String s = uniqueValues[index];
if (s.length() == 0) {
label = new FacetLabel();
} else {
label = new FacetLabel(s.split("/"));
}
int ord1 = map.getOrdinal(label);
int ord2 = compact.getOrdinal(label);
assertEquals(ord1, ord2);
if (ord1 == LabelToOrdinal.INVALID_ORDINAL) {
ord1 = compact.getNextOrdinal();
map.addLabel(label, ord1);
compact.addLabel(label, ord1);
}
}
for (int i = 0; i < numUniqueValues; i++) {
FacetLabel label;
String s = uniqueValues[i];
if (s.length() == 0) {
label = new FacetLabel();
} else {
label = new FacetLabel(s.split("/"));
}
int ord1 = map.getOrdinal(label);
int ord2 = compact.getOrdinal(label);
assertEquals(ord1, ord2);
}
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestAddTaxonomy method testAddToEmpty.
public void testAddToEmpty() throws Exception {
Directory dest = newDirectory();
Directory src = newDirectory();
DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
srcTW.addCategory(new FacetLabel("Author", "Rob Pike"));
srcTW.addCategory(new FacetLabel("Aardvarks", "Bob"));
srcTW.close();
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
OrdinalMap map = randomOrdinalMap();
destTW.addTaxonomy(src, map);
destTW.close();
validate(dest, src, map);
IOUtils.close(dest, src);
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestDirectoryTaxonomyReader method testOpenIfChangedManySegments.
@Test
public void testOpenIfChangedManySegments() throws Exception {
// test openIfChanged() when the taxonomy contains many segments
Directory dir = newDirectory();
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMergeFactor(2);
return conf;
}
};
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
int numRounds = random().nextInt(10) + 10;
// one for root
int numCategories = 1;
for (int i = 0; i < numRounds; i++) {
int numCats = random().nextInt(4) + 1;
for (int j = 0; j < numCats; j++) {
writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
}
numCategories += numCats + 1;
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
// assert categories
assertEquals(numCategories, reader.getSize());
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
int[] parents = reader.getParallelTaxonomyArrays().parents();
// round's parent is root
assertEquals(0, parents[roundOrdinal]);
for (int j = 0; j < numCats; j++) {
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
// round's parent is root
assertEquals(roundOrdinal, parents[ord]);
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.facet.taxonomy.FacetLabel in project lucene-solr by apache.
the class TestDirectoryTaxonomyReader method testOpenIfChangedReplaceTaxonomy.
@Test
public void testOpenIfChangedReplaceTaxonomy() throws Exception {
// test openIfChanged when replaceTaxonomy is called, which is equivalent to recreate
// only can work with NRT as well
Directory src = newDirectory();
DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
FacetLabel cp_b = new FacetLabel("b");
w.addCategory(cp_b);
w.close();
for (boolean nrt : new boolean[] { false, true }) {
Directory dir = newDirectory();
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
FacetLabel cp_a = new FacetLabel("a");
writer.addCategory(cp_a);
if (!nrt)
writer.commit();
DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
// fill r1's caches
assertEquals(1, r1.getOrdinal(cp_a));
assertEquals(cp_a, r1.getPath(1));
// now replace taxonomy
writer.replaceTaxonomy(src);
if (!nrt)
writer.commit();
DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
assertNotNull(r2);
// fill r2's caches
assertEquals(1, r2.getOrdinal(cp_b));
assertEquals(cp_b, r2.getPath(1));
// check that r1 doesn't see cp_b
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
assertEquals(cp_a, r1.getPath(1));
// check that r2 doesn't see cp_a
assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
assertEquals(cp_b, r2.getPath(1));
r2.close();
r1.close();
writer.close();
dir.close();
}
src.close();
}
Aggregations