Search in sources :

Example 16 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TestWikipedia method investigate.

@SuppressWarnings("unused")
private static void investigate(Trie trie) throws Exception {
    System.out.println("-- dump root children.");
    for (Node n : trie.getRoot().getChildren()) {
        System.out.print(n.getLetters()[0]);
    }
    System.out.println();
    System.out.println("-- count elements.");
    final AtomicInteger count = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        public boolean visit(Node node, int nest) {
            if (node.isTerminate())
                count.incrementAndGet();
            return true;
        }
    });
    System.out.println(count.intValue() + " elements.");
    //*
    System.out.println("-- list elements.");
    final AtomicInteger n = new AtomicInteger();
    final AtomicInteger l = new AtomicInteger();
    final AtomicInteger ln = new AtomicInteger();
    final AtomicInteger chars = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        public boolean visit(Node node, int nest) {
            if (node.isTerminate()) {
                l.incrementAndGet();
            } else {
                n.incrementAndGet();
            }
            return true;
        }
    });
    System.out.println("node: " + n.intValue());
    System.out.println("leaf: " + l.intValue());
    System.out.println("label node: " + ln.intValue());
    System.out.println("total char count in trie: " + chars.intValue());
    System.out.println("verifying trie...");
    long lap = System.currentTimeMillis();
    int c = 0;
    int sum = 0;
    for (String word : new WikipediaTitles()) {
        if (c == maxCount)
            break;
        long d = System.currentTimeMillis();
        //trie.contains(word);
        boolean found = Algorithms.contains(trie.getRoot(), word);
        sum += System.currentTimeMillis() - d;
        if (!found) {
            System.out.println("trie not contains [" + word + "]");
            break;
        }
        if (c % 100000 == 0) {
            System.out.println(c + " elements done.");
        }
        c++;
    }
    System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
    System.out.println("contains time: " + sum + " millis.");
    //		System.out.println(trie.getRoot().getChildren().length + "children in root");
    if (trie instanceof TailPatriciaTrie) {
        //			((TailPatriciaTrie) trie).pack();
        System.out.println("tail length: " + ((TailPatriciaTrie) trie).getTailBuilder().getTails().length());
    }
    final Trie t = trie;
    new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                Thread.sleep(100000);
                t.contains("hello");
            } catch (InterruptedException e) {
            }
        }
    }).start();
//*/
}
Also used : WikipediaTitles(org.trie4j.test.WikipediaTitles) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie)

Example 17 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class AbstractSetWikipediaSerializeTest method test.

@SuppressWarnings("unchecked")
@Test
public void test() throws Exception {
    WikipediaTitles wt = new WikipediaTitles();
    Set<String> set = wt.insertTo(set());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    LapTimer lt = new LapTimer();
    oos.writeObject(set);
    oos.flush();
    long wd = lt.lapMillis();
    byte[] serialized = baos.toByteArray();
    lt.reset();
    Set<String> t = (Set<String>) new ObjectInputStream(new ByteArrayInputStream(serialized)).readObject();
    long rd = lt.lapMillis();
    long vd = wt.assertAllContains(t);
    System.out.println(String.format("%s%s, size: %d, write(ms): %d, read(ms): %d, verify(ms): %d.", set.getClass().getSimpleName(), "", serialized.length, wd, rd, vd));
}
Also used : Set(java.util.Set) ByteArrayInputStream(java.io.ByteArrayInputStream) WikipediaTitles(org.trie4j.test.WikipediaTitles) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) LapTimer(org.trie4j.test.LapTimer) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test)

Aggregations

WikipediaTitles (org.trie4j.test.WikipediaTitles)17 LapTimer (org.trie4j.test.LapTimer)12 Test (org.junit.Test)8 TailPatriciaTrie (org.trie4j.patricia.TailPatriciaTrie)7 PrintWriter (java.io.PrintWriter)5 Trie (org.trie4j.Trie)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 TailLOUDSTrie (org.trie4j.louds.TailLOUDSTrie)4 FileOutputStream (java.io.FileOutputStream)3 ObjectInputStream (java.io.ObjectInputStream)3 OutputStream (java.io.OutputStream)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 PatriciaTrie (org.trie4j.patricia.PatriciaTrie)3 Node (org.trie4j.Node)2 NodeVisitor (org.trie4j.NodeVisitor)2 BytesSuccinctBitVector (org.trie4j.bv.BytesSuccinctBitVector)2 ConcatTailBuilder (org.trie4j.tail.builder.ConcatTailBuilder)2 DataOutputStream (java.io.DataOutputStream)1