use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestWikipedia method investigate.
@SuppressWarnings("unused")
private static void investigate(Trie trie) throws Exception {
System.out.println("-- dump root children.");
for (Node n : trie.getRoot().getChildren()) {
System.out.print(n.getLetters()[0]);
}
System.out.println();
System.out.println("-- count elements.");
final AtomicInteger count = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if (node.isTerminate())
count.incrementAndGet();
return true;
}
});
System.out.println(count.intValue() + " elements.");
//*
System.out.println("-- list elements.");
final AtomicInteger n = new AtomicInteger();
final AtomicInteger l = new AtomicInteger();
final AtomicInteger ln = new AtomicInteger();
final AtomicInteger chars = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if (node.isTerminate()) {
l.incrementAndGet();
} else {
n.incrementAndGet();
}
return true;
}
});
System.out.println("node: " + n.intValue());
System.out.println("leaf: " + l.intValue());
System.out.println("label node: " + ln.intValue());
System.out.println("total char count in trie: " + chars.intValue());
System.out.println("verifying trie...");
long lap = System.currentTimeMillis();
int c = 0;
int sum = 0;
for (String word : new WikipediaTitles()) {
if (c == maxCount)
break;
long d = System.currentTimeMillis();
//trie.contains(word);
boolean found = Algorithms.contains(trie.getRoot(), word);
sum += System.currentTimeMillis() - d;
if (!found) {
System.out.println("trie not contains [" + word + "]");
break;
}
if (c % 100000 == 0) {
System.out.println(c + " elements done.");
}
c++;
}
System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
System.out.println("contains time: " + sum + " millis.");
// System.out.println(trie.getRoot().getChildren().length + "children in root");
if (trie instanceof TailPatriciaTrie) {
// ((TailPatriciaTrie) trie).pack();
System.out.println("tail length: " + ((TailPatriciaTrie) trie).getTailBuilder().getTails().length());
}
final Trie t = trie;
new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(100000);
t.contains("hello");
} catch (InterruptedException e) {
}
}
}).start();
//*/
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class AbstractSetWikipediaSerializeTest method test.
@SuppressWarnings("unchecked")
@Test
public void test() throws Exception {
WikipediaTitles wt = new WikipediaTitles();
Set<String> set = wt.insertTo(set());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
LapTimer lt = new LapTimer();
oos.writeObject(set);
oos.flush();
long wd = lt.lapMillis();
byte[] serialized = baos.toByteArray();
lt.reset();
Set<String> t = (Set<String>) new ObjectInputStream(new ByteArrayInputStream(serialized)).readObject();
long rd = lt.lapMillis();
long vd = wt.assertAllContains(t);
System.out.println(String.format("%s%s, size: %d, write(ms): %d, read(ms): %d, verify(ms): %d.", set.getClass().getSimpleName(), "", serialized.length, wd, rd, vd));
}
Aggregations