use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class AbstractWikipediaTest method test.
@Test
public void test() throws Exception {
Trie trie = createFirstTrie();
System.out.println("building first trie: " + trie.getClass().getName());
int c = 0, chars = 0;
long b = 0;
LapTimer t = new LapTimer();
for (String word : new WikipediaTitles()) {
try {
t.reset();
trie.insert(word);
b += t.lapNanos();
} catch (Exception e) {
System.out.println("exception at " + c + "th word: " + word);
trie.dump(new PrintWriter(System.out));
throw e;
}
c++;
chars += word.length();
}
System.out.println(String.format("done in %d millis with %d words and %d chars.", (b / 1000000), c, chars));
t.reset();
Trie second = buildSecondTrie(trie);
long d = t.lapMillis();
System.out.println(second.getClass().getName());
System.out.println("done in " + d + "millis.");
System.out.println("verifying trie.");
long sum = 0;
c = 0;
for (String word : new WikipediaTitles()) {
t.reset();
boolean found = second.contains(word);
sum += t.lapNanos();
c++;
if (!found) {
System.out.println(String.format("verification failed. trie not contains %d th word: [%s].", c, word));
break;
}
}
System.out.println("done in " + (sum / 1000000) + " millis with " + c + " words.");
afterVerification(second);
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestWikipedia method main.
public static void main(String[] args) throws Exception {
// Trie trie = new org.trie4j.patricia.simple.PatriciaTrie();
// Trie trie = new org.trie4j.patricia.multilayer.MultilayerPatriciaTrie();
Trie trie = new org.trie4j.patricia.TailPatriciaTrie(new ConcatTailBuilder());
LapTimer t = new LapTimer();
{
System.out.println("-- building first trie: " + trie.getClass().getName());
int c = 0;
int charCount = 0;
long sum = 0;
for (String word : new WikipediaTitles()) {
t.reset();
trie.insert(word);
sum += t.lapMillis();
charCount += word.length();
c++;
if (c == maxCount)
break;
}
System.out.println(String.format("-- done in %d millis with %d entries, %d chars", sum / 1000000, c, charCount));
}
{
System.out.println("-- building second trie.");
t.reset();
trie = new org.trie4j.doublearray.DoubleArray(trie, 65536);
// trie = new org.trie4j.doublearray.TailDoubleArray(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new ConcatTailBuilder());
// trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new SuffixTrieTailBuilder());
trie.trimToSize();
System.out.println(String.format("-- done in %d millis.", t.lapMillis() / 1000000));
System.gc();
System.gc();
System.out.println("waiting 10 seconds.");
// Thread.sleep(10000);
}
System.out.println("-- dump trie.");
trie.dump(new PrintWriter(System.out));
return;
/*
System.out.println("-- traversing trie.");
final AtomicInteger cnt = new AtomicInteger();
trie.traverse(new NodeVisitor() {
@Override
public boolean visit(Node node, int nest) {
if(node instanceof InternalCharsNode){
if(((InternalCharsNode)node).getChildren().length == 1){
cnt.incrementAndGet();
}
}
return true;
}
});
System.out.println(cnt + " nodes have 1 child.");
// investigate(trie, charCount);
//*
// dump(trie);
System.out.println("-- pack");
t.lap();
if(trie instanceof MultilayerPatriciaTrie){
MultilayerPatriciaTrie mt = (MultilayerPatriciaTrie)trie;
mt.pack();
System.out.println("-- pack done in " + (t.lap() / 1000000) + " millis.");
// dump(trie);
System.gc();
Thread.sleep(1000);
System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
investigate(mt);
}
//*/
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestIO method testSave.
@Test
public void testSave() throws Exception {
System.out.println("--- building patricia trie ---");
Trie trie = new org.trie4j.patricia.TailPatriciaTrie();
int c = 0;
LapTimer t1 = new LapTimer();
for (String word : new WikipediaTitles()) {
trie.insert(word);
c++;
if (c == maxCount)
break;
}
System.out.println("done in " + t1.lapMillis() + " millis.");
System.out.println(c + "entries in ja wikipedia titles.");
System.out.println("-- building double array.");
t1.reset();
TailDoubleArray da = new TailDoubleArray(trie);
trie = null;
System.out.println("done in " + t1.lapMillis() + " millis.");
OutputStream os = new GZIPOutputStream(new FileOutputStream("da.dat"));
try {
System.out.println("-- saving double array.");
t1.reset();
da.save(os);
System.out.println("done in " + t1.lapMillis() + " millis.");
da.dump(new PrintWriter(System.out));
} finally {
os.close();
}
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestWikipedia method main.
public static void main(String[] args) throws Exception {
System.out.println("--- building patricia trie ---");
Trie trie = new PatriciaTrie();
// Trie trie = new TailPatriciaTrie(new ConcatTailBuilder());
int c = 0;
LapTimer t1 = new LapTimer();
for (String word : new WikipediaTitles()) {
trie.insert(word);
c++;
if (c == maxCount)
break;
}
System.out.println("done in " + t1.lapMillis() + " millis.");
System.out.println(c + "entries in ja wikipedia titles.");
System.out.println("-- building double array.");
t1.reset();
// Trie da = new TailDoubleArray(trie, 65536, new ConcatTailBuilder());
// Trie da = new DoubleArray(trie, 65536);
Trie da = trie;
trie = null;
System.out.println("done in " + t1.lapMillis() + " millis.");
final AtomicInteger count = new AtomicInteger();
Algorithms.traverseByBreadth(da.getRoot(), new NodeVisitor() {
@Override
public boolean visit(Node node, int nest) {
count.incrementAndGet();
return true;
}
});
System.out.println(count + " nodes in trie.");
da.dump(new PrintWriter(System.out));
verify(da);
System.out.println("---- common prefix search ----");
System.out.println("-- for 東京国際フォーラム");
for (String s : da.commonPrefixSearch("東京国際フォーラム")) {
System.out.println(s);
}
System.out.println("-- for 大阪城ホール");
for (String s : da.commonPrefixSearch("大阪城ホール")) {
System.out.println(s);
}
System.out.println("---- predictive search ----");
System.out.println("-- for 大阪城");
for (String s : da.predictiveSearch("大阪城")) {
System.out.println(s);
}
System.out.println("---- done ----");
Thread.sleep(10000);
da.contains("hello");
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestWikipedia method verify.
private static void verify(Trie da) throws Exception {
System.out.println("verifying double array...");
int c = 0;
int sum = 0;
LapTimer t1 = new LapTimer();
LapTimer t = new LapTimer();
for (String word : new WikipediaTitles()) {
if (c == maxCount)
break;
t.reset();
boolean found = da.contains(word);
sum += t.lapMillis();
c++;
if (!found) {
System.out.println("verification failed. trie not contains " + c + " th word: [" + word + "]");
break;
}
}
System.out.println("done " + c + "words in " + t1.lapMillis() + " millis.");
System.out.println("contains time: " + sum + " millis.");
}
Aggregations