Search in sources :

Example 1 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class AbstractWikipediaTest method test.

@Test
public void test() throws Exception {
    Trie trie = createFirstTrie();
    System.out.println("building first trie: " + trie.getClass().getName());
    int c = 0, chars = 0;
    long b = 0;
    LapTimer t = new LapTimer();
    for (String word : new WikipediaTitles()) {
        try {
            t.reset();
            trie.insert(word);
            b += t.lapNanos();
        } catch (Exception e) {
            System.out.println("exception at " + c + "th word: " + word);
            trie.dump(new PrintWriter(System.out));
            throw e;
        }
        c++;
        chars += word.length();
    }
    System.out.println(String.format("done in %d millis with %d words and %d chars.", (b / 1000000), c, chars));
    t.reset();
    Trie second = buildSecondTrie(trie);
    long d = t.lapMillis();
    System.out.println(second.getClass().getName());
    System.out.println("done in " + d + "millis.");
    System.out.println("verifying trie.");
    long sum = 0;
    c = 0;
    for (String word : new WikipediaTitles()) {
        t.reset();
        boolean found = second.contains(word);
        sum += t.lapNanos();
        c++;
        if (!found) {
            System.out.println(String.format("verification failed.  trie not contains %d th word: [%s].", c, word));
            break;
        }
    }
    System.out.println("done in " + (sum / 1000000) + " millis with " + c + " words.");
    afterVerification(second);
}
Also used : WikipediaTitles(org.trie4j.test.WikipediaTitles) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) LapTimer(org.trie4j.test.LapTimer) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 2 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TestWikipedia method main.

public static void main(String[] args) throws Exception {
    //		Trie trie = new org.trie4j.patricia.simple.PatriciaTrie();
    //		Trie trie = new org.trie4j.patricia.multilayer.MultilayerPatriciaTrie();
    Trie trie = new org.trie4j.patricia.TailPatriciaTrie(new ConcatTailBuilder());
    LapTimer t = new LapTimer();
    {
        System.out.println("-- building first trie: " + trie.getClass().getName());
        int c = 0;
        int charCount = 0;
        long sum = 0;
        for (String word : new WikipediaTitles()) {
            t.reset();
            trie.insert(word);
            sum += t.lapMillis();
            charCount += word.length();
            c++;
            if (c == maxCount)
                break;
        }
        System.out.println(String.format("-- done in %d millis with %d entries, %d chars", sum / 1000000, c, charCount));
    }
    {
        System.out.println("-- building second trie.");
        t.reset();
        trie = new org.trie4j.doublearray.DoubleArray(trie, 65536);
        //			trie = new org.trie4j.doublearray.TailDoubleArray(trie, 65536, new ConcatTailBuilder());
        //			trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new ConcatTailBuilder());
        //			trie = new org.trie4j.louds.LOUDSTrie(trie, 65536, new SuffixTrieTailBuilder());
        trie.trimToSize();
        System.out.println(String.format("-- done in %d millis.", t.lapMillis() / 1000000));
        System.gc();
        System.gc();
        System.out.println("waiting 10 seconds.");
    //			Thread.sleep(10000);
    }
    System.out.println("-- dump trie.");
    trie.dump(new PrintWriter(System.out));
    return;
/*
		System.out.println("-- traversing trie.");
		final AtomicInteger cnt = new AtomicInteger();
		trie.traverse(new NodeVisitor() {
			@Override
			public boolean visit(Node node, int nest) {
				if(node instanceof InternalCharsNode){
					if(((InternalCharsNode)node).getChildren().length == 1){
						cnt.incrementAndGet();
					}
				}
				return true;
			}
		});
		System.out.println(cnt + " nodes have 1 child.");
//		investigate(trie, charCount);
//*
//		dump(trie);
		System.out.println("-- pack");
		t.lap();
		if(trie instanceof MultilayerPatriciaTrie){
			MultilayerPatriciaTrie mt = (MultilayerPatriciaTrie)trie;
			mt.pack();
			System.out.println("-- pack done in " + (t.lap() / 1000000) + " millis.");
	//		dump(trie);
			System.gc();
			Thread.sleep(1000);
			System.out.println(Runtime.getRuntime().freeMemory() + " bytes free.");
			investigate(mt);
		}
//*/
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) ConcatTailBuilder(org.trie4j.tail.builder.ConcatTailBuilder) WikipediaTitles(org.trie4j.test.WikipediaTitles) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) LapTimer(org.trie4j.test.LapTimer) PrintWriter(java.io.PrintWriter)

Example 3 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TestIO method testSave.

@Test
public void testSave() throws Exception {
    System.out.println("--- building patricia trie ---");
    Trie trie = new org.trie4j.patricia.TailPatriciaTrie();
    int c = 0;
    LapTimer t1 = new LapTimer();
    for (String word : new WikipediaTitles()) {
        trie.insert(word);
        c++;
        if (c == maxCount)
            break;
    }
    System.out.println("done in " + t1.lapMillis() + " millis.");
    System.out.println(c + "entries in ja wikipedia titles.");
    System.out.println("-- building double array.");
    t1.reset();
    TailDoubleArray da = new TailDoubleArray(trie);
    trie = null;
    System.out.println("done in " + t1.lapMillis() + " millis.");
    OutputStream os = new GZIPOutputStream(new FileOutputStream("da.dat"));
    try {
        System.out.println("-- saving double array.");
        t1.reset();
        da.save(os);
        System.out.println("done in " + t1.lapMillis() + " millis.");
        da.dump(new PrintWriter(System.out));
    } finally {
        os.close();
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) WikipediaTitles(org.trie4j.test.WikipediaTitles) Trie(org.trie4j.Trie) LapTimer(org.trie4j.test.LapTimer) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 4 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TestWikipedia method main.

public static void main(String[] args) throws Exception {
    System.out.println("--- building patricia trie ---");
    Trie trie = new PatriciaTrie();
    //		Trie trie = new TailPatriciaTrie(new ConcatTailBuilder());
    int c = 0;
    LapTimer t1 = new LapTimer();
    for (String word : new WikipediaTitles()) {
        trie.insert(word);
        c++;
        if (c == maxCount)
            break;
    }
    System.out.println("done in " + t1.lapMillis() + " millis.");
    System.out.println(c + "entries in ja wikipedia titles.");
    System.out.println("-- building double array.");
    t1.reset();
    //		Trie da = new TailDoubleArray(trie, 65536, new ConcatTailBuilder());
    //		Trie da = new DoubleArray(trie, 65536);
    Trie da = trie;
    trie = null;
    System.out.println("done in " + t1.lapMillis() + " millis.");
    final AtomicInteger count = new AtomicInteger();
    Algorithms.traverseByBreadth(da.getRoot(), new NodeVisitor() {

        @Override
        public boolean visit(Node node, int nest) {
            count.incrementAndGet();
            return true;
        }
    });
    System.out.println(count + " nodes in trie.");
    da.dump(new PrintWriter(System.out));
    verify(da);
    System.out.println("---- common prefix search ----");
    System.out.println("-- for 東京国際フォーラム");
    for (String s : da.commonPrefixSearch("東京国際フォーラム")) {
        System.out.println(s);
    }
    System.out.println("-- for 大阪城ホール");
    for (String s : da.commonPrefixSearch("大阪城ホール")) {
        System.out.println(s);
    }
    System.out.println("---- predictive search ----");
    System.out.println("-- for 大阪城");
    for (String s : da.predictiveSearch("大阪城")) {
        System.out.println(s);
    }
    System.out.println("---- done ----");
    Thread.sleep(10000);
    da.contains("hello");
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) Node(org.trie4j.Node) WikipediaTitles(org.trie4j.test.WikipediaTitles) Trie(org.trie4j.Trie) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) LapTimer(org.trie4j.test.LapTimer) NodeVisitor(org.trie4j.NodeVisitor) PrintWriter(java.io.PrintWriter)

Example 5 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TestWikipedia method verify.

private static void verify(Trie da) throws Exception {
    System.out.println("verifying double array...");
    int c = 0;
    int sum = 0;
    LapTimer t1 = new LapTimer();
    LapTimer t = new LapTimer();
    for (String word : new WikipediaTitles()) {
        if (c == maxCount)
            break;
        t.reset();
        boolean found = da.contains(word);
        sum += t.lapMillis();
        c++;
        if (!found) {
            System.out.println("verification failed.  trie not contains " + c + " th word: [" + word + "]");
            break;
        }
    }
    System.out.println("done " + c + "words in " + t1.lapMillis() + " millis.");
    System.out.println("contains time: " + sum + " millis.");
}
Also used : WikipediaTitles(org.trie4j.test.WikipediaTitles) LapTimer(org.trie4j.test.LapTimer)

Aggregations

WikipediaTitles (org.trie4j.test.WikipediaTitles)17 LapTimer (org.trie4j.test.LapTimer)12 Test (org.junit.Test)8 TailPatriciaTrie (org.trie4j.patricia.TailPatriciaTrie)7 PrintWriter (java.io.PrintWriter)5 Trie (org.trie4j.Trie)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 TailLOUDSTrie (org.trie4j.louds.TailLOUDSTrie)4 FileOutputStream (java.io.FileOutputStream)3 ObjectInputStream (java.io.ObjectInputStream)3 OutputStream (java.io.OutputStream)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 PatriciaTrie (org.trie4j.patricia.PatriciaTrie)3 Node (org.trie4j.Node)2 NodeVisitor (org.trie4j.NodeVisitor)2 BytesSuccinctBitVector (org.trie4j.bv.BytesSuccinctBitVector)2 ConcatTailBuilder (org.trie4j.tail.builder.ConcatTailBuilder)2 DataOutputStream (java.io.DataOutputStream)1