Search in sources :

Example 6 with LapTimer

use of org.trie4j.test.LapTimer in project trie4j by takawitter.

the class TrieWriterTest method test.

@Test
public void test() throws Exception {
    LapTimer lt = new LapTimer();
    PatriciaTrie origTrie = new PatriciaTrie();
    new WikipediaTitles().insertTo(origTrie);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    TrieWriter tw = new TrieWriter(baos);
    Trie trie = new TailLOUDSTrie(origTrie, new LOUDSPPBvTree(origTrie.nodeSize()), new SuffixTrieDenseTailArrayBuilder());
    lt.reset();
    tw.write(trie);
    tw.flush();
    lt.lapMillis("trie saved.");
    System.out.println(baos.size() + " bytes");
    TrieReader tr = new TrieReader(new ByteArrayInputStream(baos.toByteArray()));
    lt.reset();
    Trie trie2 = tr.read();
    lt.lapMillis("trie loaded.");
    long d = new WikipediaTitles().assertAllContains(trie2);
    System.out.println("[" + d + "ms]: verified");
}
Also used : TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) LOUDSPPBvTree(org.trie4j.louds.bvtree.LOUDSPPBvTree) ByteArrayInputStream(java.io.ByteArrayInputStream) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) SuffixTrieDenseTailArrayBuilder(org.trie4j.tail.SuffixTrieDenseTailArrayBuilder) WikipediaTitles(org.trie4j.test.WikipediaTitles) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Trie(org.trie4j.Trie) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) LapTimer(org.trie4j.test.LapTimer) Test(org.junit.Test)

Example 7 with LapTimer

use of org.trie4j.test.LapTimer in project trie4j by takawitter.

the class LZ77 method main1.

public static void main1(String[] args) throws Exception {
    LapTimer lt = new LapTimer();
    String src = "abcabdrz";
    src = read("data/jawiki-20120220-tail");
    //		src = readTitles("data/jawiki-20120220-all-titles-in-ns0.gz");
    int windowSize = 8192;
    StringBuilder dest = new StringBuilder();
    lt.reset();
    compress2(src, dest, windowSize);
    lt.lapMillis("compress done.");
    int l = 0;
    for (int i = 0; i < dest.length() / 3; i++) {
        l = Math.max(dest.charAt(i * 3 + 1), l);
    }
    System.out.println("max matched length: " + l);
    dump(dest);
    StringBuilder b = new StringBuilder();
    lt.reset();
    decompress(dest, b);
    lt.lapMillis("decompress done.");
    boolean eq = src.equals(b.toString());
    System.out.println(String.format("src: %d, comp: %d(%02.1f%%), decomp: %d, %b", src.length(), dest.length(), 1.0 * dest.length() / src.length() * 100, b.length(), eq));
    for (int i = 0; i < src.length(); i++) {
        if (src.charAt(i) != b.charAt(i)) {
            System.out.println(String.format("%dth char different [%c:%c]", i, src.charAt(i), b.charAt(i)));
            int s = Math.max(i - 5, 0);
            int e = Math.min(i + 5, src.length());
            System.out.println("src: " + src.substring(s, e));
            System.out.println("dec: " + b.substring(s, e));
            break;
        }
    }
}
Also used : LapTimer(org.trie4j.test.LapTimer)

Example 8 with LapTimer

use of org.trie4j.test.LapTimer in project trie4j by takawitter.

the class LZ77 method main2.

public static void main2(String[] args) throws Exception {
    LapTimer lt = new LapTimer();
    String src = "abcabdrz";
    src = read("data/jawiki-20120220-tail");
    int windowSize = 8192;
    System.out.println("total " + src.length() + " chars. windowSize: " + windowSize);
    StringBuilder dest1 = new StringBuilder();
    lt.reset();
    compress1(src, dest1, windowSize);
    lt.lapMillis("compress1 done.");
    StringBuilder dest2 = new StringBuilder();
    lt.reset();
    compress2(src, dest2, windowSize);
    lt.lapMillis("compress2 done.");
    System.out.println(String.format("src: %d, comp1: %d(%02.1f%%)", src.length(), dest1.length(), 1.0 * dest1.length() / src.length() * 100));
    System.out.println(String.format("src: %d, comp2: %d(%02.1f%%)", src.length(), dest2.length(), 1.0 * dest2.length() / src.length() * 100));
    for (int i = 0; i < Math.min(dest1.length(), dest2.length()); i++) {
        if (dest1.charAt(i) != dest2.charAt(i)) {
            System.out.println(String.format("%dth char different [%s:%s]", i, toString(dest1.charAt(i)), toString(dest2.charAt(i))));
            dump(dest1, dest2);
            break;
        }
    }
}
Also used : LapTimer(org.trie4j.test.LapTimer)

Example 9 with LapTimer

use of org.trie4j.test.LapTimer in project trie4j by takawitter.

the class AbstractMapTrieWikipediaSerializeTest method test.

@Test
@SuppressWarnings("unchecked")
public void test() throws Exception {
    WikipediaTitles wt = new WikipediaTitles();
    MapTrie<Integer> trie = wt.insertTo(newTrie());
    trie = buildSecondTrie(trie);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    LapTimer lt = new LapTimer();
    oos.writeObject(trie);
    oos.flush();
    long wd = lt.lapMillis();
    byte[] serialized = baos.toByteArray();
    lt.reset();
    MapTrie<Integer> t = (MapTrie<Integer>) new ObjectInputStream(new ByteArrayInputStream(serialized)).readObject();
    long rd = lt.lapMillis();
    long vd = wt.assertAllContains(t);
    System.out.println(String.format("%s%s%s, size: %d, write(ms): %d, read(ms): %d, verify(ms): %d.", trie.getClass().getSimpleName(), getBvTreeClassName(trie), getTailClassName(trie), serialized.length, wd, rd, vd));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) WikipediaTitles(org.trie4j.test.WikipediaTitles) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) LapTimer(org.trie4j.test.LapTimer) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test)

Example 10 with LapTimer

use of org.trie4j.test.LapTimer in project trie4j by takawitter.

the class BitVectorExp method main.

public static void main(String[] args) throws Exception {
    Trie trie = new PatriciaTrie();
    int c = 0;
    // You can download archive from http://dumps.wikimedia.org/jawiki/latest/
    LapTimer t = new LapTimer();
    for (String word : new WikipediaTitles()) {
        trie.insert(word);
        c++;
        if (c == maxCount)
            break;
    }
    t.lapMillis("trie building done. %d words.", c);
    final BytesSuccinctBitVector bv = new BytesSuccinctBitVector(5000000);
    final AtomicInteger nodeCount = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        @Override
        public boolean visit(Node node, int nest) {
            Node[] children = node.getChildren();
            if (children != null) {
                int n = node.getChildren().length;
                for (int i = 0; i < n; i++) {
                    bv.append(true);
                }
            }
            bv.append(false);
            nodeCount.incrementAndGet();
            return true;
        }
    });
    trie = null;
    t.lapMillis("done. %d nodes inserted. do rank and select", nodeCount.intValue());
    for (int i = 0; i < c; i += 100) {
        int count = bv.rank(i, true);
        bv.select(count, true);
    }
    t.lapMillis("done.");
    Thread.sleep(10000);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) Node(org.trie4j.Node) BytesSuccinctBitVector(org.trie4j.bv.BytesSuccinctBitVector) WikipediaTitles(org.trie4j.test.WikipediaTitles) Trie(org.trie4j.Trie) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) LapTimer(org.trie4j.test.LapTimer) NodeVisitor(org.trie4j.NodeVisitor)

Aggregations

LapTimer (org.trie4j.test.LapTimer)16 WikipediaTitles (org.trie4j.test.WikipediaTitles)12 Test (org.junit.Test)8 PrintWriter (java.io.PrintWriter)6 ByteArrayInputStream (java.io.ByteArrayInputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 Trie (org.trie4j.Trie)4 ObjectInputStream (java.io.ObjectInputStream)3 ObjectOutputStream (java.io.ObjectOutputStream)3 PatriciaTrie (org.trie4j.patricia.PatriciaTrie)3 TailPatriciaTrie (org.trie4j.patricia.TailPatriciaTrie)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Node (org.trie4j.Node)2 NodeVisitor (org.trie4j.NodeVisitor)2 TailLOUDSTrie (org.trie4j.louds.TailLOUDSTrie)2 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 OutputStream (java.io.OutputStream)1 Set (java.util.Set)1 GZIPInputStream (java.util.zip.GZIPInputStream)1