use of org.trie4j.bytes.PatriciaTrie in project trie4j by takawitter.
the class TestWikipedia method investigate.
private static void investigate(PatriciaTrie trie, int charCount) throws Exception {
System.out.println("-- count elements.");
final AtomicInteger count = new AtomicInteger();
trie.visit(new TrieVisitor() {
public void accept(Node node, int nest) {
if (node.isTerminate())
count.incrementAndGet();
}
});
System.out.println(count.intValue() + " elements.");
//*
System.out.println("-- list elements.");
final AtomicInteger n = new AtomicInteger();
final AtomicInteger l = new AtomicInteger();
final AtomicInteger ln = new AtomicInteger();
final AtomicInteger chars = new AtomicInteger();
trie.visit(new TrieVisitor() {
public void accept(Node node, int nest) {
if (node.isTerminate()) {
l.incrementAndGet();
} else {
n.incrementAndGet();
}
chars.addAndGet(node.getLetters().length);
}
});
System.out.println("node: " + n.intValue());
System.out.println("leaf: " + l.intValue());
System.out.println("label node: " + ln.intValue());
System.out.println("total char count: " + charCount);
System.out.println("total char count in trie: " + chars.intValue());
System.out.println("verifying trie...");
BufferedReader r = new BufferedReader(new InputStreamReader(// new GZIPInputStream(new FileInputStream("jawiki-20120220-all-titles-in-ns0.gz"))
new GZIPInputStream(new FileInputStream("enwiki-20120403-all-titles-in-ns0.gz")), CharsetUtil.newUTF8Decoder()));
long lap = System.currentTimeMillis();
int c = 0;
int sum = 0;
String word = null;
while ((word = r.readLine()) != null) {
if (c == maxCount)
break;
long d = System.currentTimeMillis();
boolean found = trie.contains(word);
sum += System.currentTimeMillis() - d;
if (!found) {
System.out.println("trie not contains [" + word + "]");
break;
}
if (c % 100000 == 0) {
System.out.println(c + " elements done.");
}
c++;
}
System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
System.out.println("contains time: " + sum + " millis.");
System.out.println(trie.getRoot().getChildren().length + "children in root");
final PatriciaTrie t = trie;
new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(100000);
t.contains("hello");
} catch (InterruptedException e) {
}
}
}).start();
//*/
}
use of org.trie4j.bytes.PatriciaTrie in project trie4j by takawitter.
the class Test method main.
public static void main(String[] args) throws Exception {
System.out.println("--- patricia trie ---");
go(new PatriciaTrie());
// System.out.println("--- hash trie ---");
// go(new HashSetTrie());
}
use of org.trie4j.bytes.PatriciaTrie in project trie4j by takawitter.
the class TestWikipedia method main.
public static void main(String[] args) throws Exception {
System.out.println("--- recursive patricia trie ---");
PatriciaTrie trie = new org.trie4j.bytes.PatriciaTrie();
int c = 0;
// You can download archive from http://dumps.wikimedia.org/jawiki/latest/
BufferedReader r = new BufferedReader(new InputStreamReader(// new GZIPInputStream(new FileInputStream("jawiki-20120220-all-titles-in-ns0.gz"))
new GZIPInputStream(new FileInputStream("enwiki-20120403-all-titles-in-ns0.gz")), CharsetUtil.newUTF8Decoder()));
String word = null;
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
long sum = 0;
long lap = System.currentTimeMillis();
int charCount = 0;
while ((word = r.readLine()) != null) {
byte[] bytes = word.getBytes("UTF-8");
long d = System.currentTimeMillis();
trie.insert(bytes);
sum += System.currentTimeMillis() - d;
charCount += word.length();
if (c % 100000 == 0) {
d = System.currentTimeMillis() - lap;
long free = Runtime.getRuntime().freeMemory();
System.out.println(c + "," + free + "," + Runtime.getRuntime().maxMemory() + "," + d);
lap = System.currentTimeMillis();
}
c++;
if (c == maxCount)
break;
}
System.out.println(c + "entries in ja wikipedia titles.");
System.out.println("insert time: " + sum + " millis.");
System.out.println("-- insert done.");
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
investigate(trie, charCount);
/*
// dump(trie);
System.out.println("-- pack");
lap = System.currentTimeMillis();
if(trie instanceof MultilayerPatriciaTrie){
MultilayerPatriciaTrie mt = (MultilayerPatriciaTrie)trie;
mt.pack();
System.out.println("-- pack done in " + (System.currentTimeMillis() - lap) + " millis.");
// dump(trie);
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
investigate(mt, charCount);
}
//*/
}
Aggregations