use of org.trie4j.tail.builder.ConcatTailBuilder in project trie4j by takawitter.
the class TestWikipedia method main.
public static void main(String[] args) throws Exception {
// Trie trie = new org.trie4j.patricia.simple.PatriciaTrie();
// Trie trie = new org.trie4j.patricia.multilayer.MultilayerPatriciaTrie();
Trie trie = new org.trie4j.patricia.TailPatriciaTrie(new ConcatTailBuilder());
LapTimer t = new LapTimer();
{
System.out.println("-- building first trie: " + trie.getClass().getName());
int c = 0;
int charCount = 0;
long sum = 0;
for (String word : new WikipediaTitles()) {
t.reset();
trie.insert(word);
sum += t.lapMillis();
charCount += word.length();
c++;
if (c == maxCount)
break;
}
System.out.println(String.format("-- done in %d millis with %d entries, %d chars", sum / 1000000, c, charCount));
}
{
System.out.println("-- building second trie.");
t.reset();
trie = new org.trie4j.doublearray.DoubleArray(trie, 65536);
// trie = new org.trie4j.doublearray.TailDoubleArray(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new SuffixTrieTailBuilder());
trie.trimToSize();
System.out.println(String.format("-- done in %d millis.", t.lapMillis() / 1000000));
System.gc();
System.gc();
System.out.println("waiting 10 seconds.");
// Thread.sleep(10000);
}
System.out.println("-- dump trie.");
trie.dump(new PrintWriter(System.out));
return;
/*
System.out.println("-- traversing trie.");
final AtomicInteger cnt = new AtomicInteger();
trie.traverse(new NodeVisitor() {
@Override
public boolean visit(Node node, int nest) {
if(node instanceof InternalCharsNode){
if(((InternalCharsNode)node).getChildren().length == 1){
cnt.incrementAndGet();
}
}
return true;
}
});
System.out.println(cnt + " nodes have 1 child.");
// investigate(trie, charCount);
//*
// dump(trie);
System.out.println("-- pack");
t.lap();
if(trie instanceof MultilayerPatriciaTrie){
MultilayerPatriciaTrie mt = (MultilayerPatriciaTrie)trie;
mt.pack();
System.out.println("-- pack done in " + (t.lap() / 1000000) + " millis.");
// dump(trie);
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
investigate(mt);
}
//*/
}
use of org.trie4j.tail.builder.ConcatTailBuilder in project trie4j by takawitter.
the class SBVConcatTailArrayTest method test.
@Test
public void test() throws Exception {
// 普通にSBVConcatTailArrayIndexBuilder使った場合と、
// add毎にappendするTailArrayIndexBuilderを作ってそれを使った
// 場合でbitvectorやcacheに差が出るか調べる
TailPatriciaTrie org = new TailPatriciaTrie(new ConcatTailBuilder());
new WikipediaTitles().insertTo(org);
TailLOUDSTrie louds1 = new TailLOUDSTrie(org, new SBVConcatTailArrayAppendingBuilder());
new WikipediaTitles().assertAllContains(louds1);
BytesSuccinctBitVector sbv1 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds1.getTailArray()).getTailIndex()).getSbv();
TailLOUDSTrie louds2 = new TailLOUDSTrie(org, new SBVConcatTailArrayBuilder());
new WikipediaTitles().assertAllContains(louds2);
BytesSuccinctBitVector sbv2 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds2.getTailArray()).getTailIndex()).getSbv();
{
int n = sbv1.size();
System.out.println("sbv size: " + n);
Assert.assertEquals(n, sbv2.size());
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th bit", sbv1.get(i), sbv2.get(i));
}
}
{
int[] countCache1 = sbv1.getCountCache0();
int[] countCache2 = sbv2.getCountCache0();
int n = countCache1.length;
System.out.println("countCache0 size should be: " + (sbv1.size() / 64 + 1));
System.out.println("countCache0 size: " + n);
// Assert.assertEquals(n, countCache2.length);
n = Math.min(countCache1.length, countCache2.length);
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th index cache.", countCache1[i], countCache2[i]);
}
}
{
IntArray indexCache1 = sbv1.getIndexCache0();
IntArray indexCache2 = sbv2.getIndexCache0();
int n = indexCache1.size();
System.out.println("indexCache0 size1: " + n);
System.out.println("indexCache0 size2: " + indexCache2.size());
// Assert.assertEquals(n, countCache2.length);
n = Math.min(indexCache1.size(), indexCache2.size());
for (int i = 0; i < 10; i++) {
System.out.print(indexCache1.get(i) + ", ");
}
System.out.println();
for (int i = 0; i < 10; i++) {
System.out.print(indexCache2.get(i) + ", ");
}
System.out.println();
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th index cache.", indexCache1.get(i), indexCache2.get(i));
}
}
}
Aggregations