Search in sources :

Example 1 with TrieVisitor

use of org.trie4j.bytes.TrieVisitor in project trie4j by takawitter.

the class TestWikipedia method investigate.

private static void investigate(PatriciaTrie trie, int charCount) throws Exception {
    System.out.println("-- count elements.");
    final AtomicInteger count = new AtomicInteger();
    trie.visit(new TrieVisitor() {

        public void accept(Node node, int nest) {
            if (node.isTerminate())
                count.incrementAndGet();
        }
    });
    System.out.println(count.intValue() + " elements.");
    //*
    System.out.println("-- list elements.");
    final AtomicInteger n = new AtomicInteger();
    final AtomicInteger l = new AtomicInteger();
    final AtomicInteger ln = new AtomicInteger();
    final AtomicInteger chars = new AtomicInteger();
    trie.visit(new TrieVisitor() {

        public void accept(Node node, int nest) {
            if (node.isTerminate()) {
                l.incrementAndGet();
            } else {
                n.incrementAndGet();
            }
            chars.addAndGet(node.getLetters().length);
        }
    });
    System.out.println("node: " + n.intValue());
    System.out.println("leaf: " + l.intValue());
    System.out.println("label node: " + ln.intValue());
    System.out.println("total char count: " + charCount);
    System.out.println("total char count in trie: " + chars.intValue());
    System.out.println("verifying trie...");
    BufferedReader r = new BufferedReader(new InputStreamReader(//				new GZIPInputStream(new FileInputStream("jawiki-20120220-all-titles-in-ns0.gz"))
    new GZIPInputStream(new FileInputStream("enwiki-20120403-all-titles-in-ns0.gz")), CharsetUtil.newUTF8Decoder()));
    long lap = System.currentTimeMillis();
    int c = 0;
    int sum = 0;
    String word = null;
    while ((word = r.readLine()) != null) {
        if (c == maxCount)
            break;
        long d = System.currentTimeMillis();
        boolean found = trie.contains(word);
        sum += System.currentTimeMillis() - d;
        if (!found) {
            System.out.println("trie not contains [" + word + "]");
            break;
        }
        if (c % 100000 == 0) {
            System.out.println(c + " elements done.");
        }
        c++;
    }
    System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
    System.out.println("contains time: " + sum + " millis.");
    System.out.println(trie.getRoot().getChildren().length + "children in root");
    final PatriciaTrie t = trie;
    new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                Thread.sleep(100000);
                t.contains("hello");
            } catch (InterruptedException e) {
            }
        }
    }).start();
//*/
}
Also used : InputStreamReader(java.io.InputStreamReader) TrieVisitor(org.trie4j.bytes.TrieVisitor) Node(org.trie4j.bytes.Node) PatriciaTrie(org.trie4j.bytes.PatriciaTrie) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BufferedReader(java.io.BufferedReader)

Example 2 with TrieVisitor

use of org.trie4j.bytes.TrieVisitor in project trie4j by takawitter.

the class Test method go.

private static void go(PatriciaTrie trie) throws Exception {
    String[] words = { "apple", "appear", "a", "orange", "applejuice", "appletea", "appleshower", "orangejuice" };
    trie.insert("".getBytes("UTF-8"));
    for (String w : words) {
        System.out.println("insert \"" + w + "\"");
        trie.insert(w.getBytes("UTF-8"));
        System.out.println("--dump--");
        trie.visit(new TrieVisitor() {

            @Override
            public void accept(Node node, int nest) {
                for (int i = 0; i < nest; i++) {
                    System.out.print(" ");
                }
                byte[] letters = node.getLetters();
                if (letters == null || letters.length == 0) {
                    System.out.print("<empty>");
                } else {
                    System.out.print(fromUTF8(letters));
                }
                if (node.isTerminate()) {
                    System.out.println("*");
                } else {
                    System.out.println("");
                }
            }
        });
    }
    System.out.println(trie.contains(""));
    System.out.println("--test contains--");
    for (String w : words) {
        System.out.print(w + ": ");
        System.out.println(trie.contains(w));
    }
    System.out.println("--test not contains--");
    for (String w : new String[] { "banana", "app", "applebeer", "applejuice2" }) {
        System.out.println(w + ": " + trie.contains(w));
    }
    System.out.println("-- test common prefix search --");
    System.out.println("query: applejuicebar");
    for (byte[] w : trie.commonPrefixSearch("applejuicebar".getBytes("UTF-8"))) {
        System.out.println(new String(w, "UTF-8"));
    }
}
Also used : TrieVisitor(org.trie4j.bytes.TrieVisitor) Node(org.trie4j.bytes.Node)

Example 3 with TrieVisitor

use of org.trie4j.bytes.TrieVisitor in project trie4j by takawitter.

the class TestWikipedia method dump.

private static void dump(PatriciaTrie trie) {
    System.out.println("--dump--");
    trie.visit(new TrieVisitor() {

        @Override
        public void accept(Node node, int nest) {
            for (int i = 0; i < nest; i++) {
                System.out.print(" ");
            }
            byte[] letters = node.getLetters();
            if (letters != null && letters.length > 0) {
                System.out.print(StringUtil.fromUTF8(letters));
            }
            if (node.isTerminate()) {
                System.out.print("*");
            }
            System.out.println();
        }
    });
}
Also used : TrieVisitor(org.trie4j.bytes.TrieVisitor) Node(org.trie4j.bytes.Node)

Aggregations

Node (org.trie4j.bytes.Node)3 TrieVisitor (org.trie4j.bytes.TrieVisitor)3 BufferedReader (java.io.BufferedReader)1 FileInputStream (java.io.FileInputStream)1 InputStreamReader (java.io.InputStreamReader)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 PatriciaTrie (org.trie4j.bytes.PatriciaTrie)1