Search in sources :

Example 1 with TailPatriciaTrie

use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.

the class SBVConcatTailArrayTest method test.

@Test
public void test() throws Exception {
    // 普通にSBVConcatTailArrayIndexBuilder使った場合と、
    // add毎にappendするTailArrayIndexBuilderを作ってそれを使った
    // 場合でbitvectorやcacheに差が出るか調べる
    TailPatriciaTrie org = new TailPatriciaTrie(new ConcatTailBuilder());
    new WikipediaTitles().insertTo(org);
    TailLOUDSTrie louds1 = new TailLOUDSTrie(org, new SBVConcatTailArrayAppendingBuilder());
    new WikipediaTitles().assertAllContains(louds1);
    BytesSuccinctBitVector sbv1 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds1.getTailArray()).getTailIndex()).getSbv();
    TailLOUDSTrie louds2 = new TailLOUDSTrie(org, new SBVConcatTailArrayBuilder());
    new WikipediaTitles().assertAllContains(louds2);
    BytesSuccinctBitVector sbv2 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds2.getTailArray()).getTailIndex()).getSbv();
    {
        int n = sbv1.size();
        System.out.println("sbv size: " + n);
        Assert.assertEquals(n, sbv2.size());
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th bit", sbv1.get(i), sbv2.get(i));
        }
    }
    {
        int[] countCache1 = sbv1.getCountCache0();
        int[] countCache2 = sbv2.getCountCache0();
        int n = countCache1.length;
        System.out.println("countCache0 size should be: " + (sbv1.size() / 64 + 1));
        System.out.println("countCache0 size: " + n);
        //			Assert.assertEquals(n, countCache2.length);
        n = Math.min(countCache1.length, countCache2.length);
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th index cache.", countCache1[i], countCache2[i]);
        }
    }
    {
        IntArray indexCache1 = sbv1.getIndexCache0();
        IntArray indexCache2 = sbv2.getIndexCache0();
        int n = indexCache1.size();
        System.out.println("indexCache0 size1: " + n);
        System.out.println("indexCache0 size2: " + indexCache2.size());
        //			Assert.assertEquals(n, countCache2.length);
        n = Math.min(indexCache1.size(), indexCache2.size());
        for (int i = 0; i < 10; i++) {
            System.out.print(indexCache1.get(i) + ", ");
        }
        System.out.println();
        for (int i = 0; i < 10; i++) {
            System.out.print(indexCache2.get(i) + ", ");
        }
        System.out.println();
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th index cache.", indexCache1.get(i), indexCache2.get(i));
        }
    }
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) IntArray(org.trie4j.util.IntArray) ConcatTailBuilder(org.trie4j.tail.builder.ConcatTailBuilder) BytesSuccinctBitVector(org.trie4j.bv.BytesSuccinctBitVector) WikipediaTitles(org.trie4j.test.WikipediaTitles) Test(org.junit.Test)

Example 2 with TailPatriciaTrie

use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.

the class CreateTail method main.

public static void main(String[] args) throws Exception {
    TailPatriciaTrie trie = new TailPatriciaTrie();
    for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
        trie.insert(s);
    }
    ConcatTailArrayBuilder ta = new ConcatTailArrayBuilder(trie.size());
    new TailLOUDSTrie(trie, ta);
    OutputStream os = new FileOutputStream("data/jawiki-20120220-tail");
    try {
    /*			CharSequence seq = ta.build().getTails();
			byte[] bytes = seq.toString().getBytes("UTF16");
			System.out.println(seq.length() + "chars.");
			System.out.println(bytes.length + "bytes.");
			os.write(bytes);
*/
    } finally {
        os.close();
    }
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) WikipediaTitles(org.trie4j.test.WikipediaTitles)

Example 3 with TailPatriciaTrie

use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.

the class SaveLOUDSTrie method main.

public static void main(String[] args) throws Exception {
    TailPatriciaTrie trie1 = new TailPatriciaTrie();
    for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
        trie1.insert(s);
    }
    System.out.println(trie1.size() + "nodes.");
    SBVConcatTailArrayBuilder tailArray = new SBVConcatTailArrayBuilder(trie1.size());
    TailLOUDSTrie trie = new TailLOUDSTrie(trie1, tailArray);
    System.out.println(trie.size() + "nodes.");
    trie.freeze();
    OutputStream os = new FileOutputStream("louds.dat");
    try {
        ObjectOutputStream oos = new ObjectOutputStream(os);
        trie.writeExternal(oos);
        oos.flush();
    } finally {
        os.close();
    }
    os = new FileOutputStream("louds-bv.dat");
    try {
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(trie.getBvTree());
        oos.flush();
    } finally {
        os.close();
    }
    os = new FileOutputStream("louds-labels.dat");
    try {
        DataOutputStream dos = new DataOutputStream(os);
        for (char c : trie.getLabels()) {
            dos.writeChar(c);
        }
        dos.flush();
    } finally {
        os.close();
    }
    os = new FileOutputStream("louds-tails.dat");
    try {
        ObjectOutputStream dos = new ObjectOutputStream(os);
        dos.writeObject(tailArray);
        dos.flush();
    } finally {
        os.close();
    }
    os = new FileOutputStream("louds-tailIndex.dat");
    try {
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(tailArray);
        oos.flush();
    } finally {
        os.close();
    }
    os = new FileOutputStream("louds-term.dat");
    try {
        ObjectOutputStream dos = new ObjectOutputStream(os);
        dos.writeObject(trie.getTerm());
        dos.flush();
    } finally {
        os.close();
    }
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) FileOutputStream(java.io.FileOutputStream) SBVConcatTailArrayBuilder(org.trie4j.tail.SBVConcatTailArrayBuilder) WikipediaTitles(org.trie4j.test.WikipediaTitles) ObjectOutputStream(java.io.ObjectOutputStream)

Example 4 with TailPatriciaTrie

use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.

the class Test method main.

public static void main(String[] args) throws Exception {
    System.out.println("--- tail patricia trie ---");
    go(new TailPatriciaTrie());
//		System.out.println("--- multilayer patricia trie ---");
//		go(new MultilayerPatriciaTrie());
//		System.out.println("--- hash trie ---");
//		go(new HashSetTrie());
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie)

Example 5 with TailPatriciaTrie

use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.

the class TestWikipedia method investigate.

@SuppressWarnings("unused")
private static void investigate(Trie trie) throws Exception {
    System.out.println("-- dump root children.");
    for (Node n : trie.getRoot().getChildren()) {
        System.out.print(n.getLetters()[0]);
    }
    System.out.println();
    System.out.println("-- count elements.");
    final AtomicInteger count = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        public boolean visit(Node node, int nest) {
            if (node.isTerminate())
                count.incrementAndGet();
            return true;
        }
    });
    System.out.println(count.intValue() + " elements.");
    //*
    System.out.println("-- list elements.");
    final AtomicInteger n = new AtomicInteger();
    final AtomicInteger l = new AtomicInteger();
    final AtomicInteger ln = new AtomicInteger();
    final AtomicInteger chars = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        public boolean visit(Node node, int nest) {
            if (node.isTerminate()) {
                l.incrementAndGet();
            } else {
                n.incrementAndGet();
            }
            return true;
        }
    });
    System.out.println("node: " + n.intValue());
    System.out.println("leaf: " + l.intValue());
    System.out.println("label node: " + ln.intValue());
    System.out.println("total char count in trie: " + chars.intValue());
    System.out.println("verifying trie...");
    long lap = System.currentTimeMillis();
    int c = 0;
    int sum = 0;
    for (String word : new WikipediaTitles()) {
        if (c == maxCount)
            break;
        long d = System.currentTimeMillis();
        //trie.contains(word);
        boolean found = Algorithms.contains(trie.getRoot(), word);
        sum += System.currentTimeMillis() - d;
        if (!found) {
            System.out.println("trie not contains [" + word + "]");
            break;
        }
        if (c % 100000 == 0) {
            System.out.println(c + " elements done.");
        }
        c++;
    }
    System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
    System.out.println("contains time: " + sum + " millis.");
    //		System.out.println(trie.getRoot().getChildren().length + "children in root");
    if (trie instanceof TailPatriciaTrie) {
        //			((TailPatriciaTrie) trie).pack();
        System.out.println("tail length: " + ((TailPatriciaTrie) trie).getTailBuilder().getTails().length());
    }
    final Trie t = trie;
    new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                Thread.sleep(100000);
                t.contains("hello");
            } catch (InterruptedException e) {
            }
        }
    }).start();
//*/
}
Also used : WikipediaTitles(org.trie4j.test.WikipediaTitles) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie)

Aggregations

TailPatriciaTrie (org.trie4j.patricia.TailPatriciaTrie)6 WikipediaTitles (org.trie4j.test.WikipediaTitles)4 FileOutputStream (java.io.FileOutputStream)2 OutputStream (java.io.OutputStream)2 Test (org.junit.Test)2 TailLOUDSTrie (org.trie4j.louds.TailLOUDSTrie)2 DataOutputStream (java.io.DataOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Trie (org.trie4j.Trie)1 BytesSuccinctBitVector (org.trie4j.bv.BytesSuccinctBitVector)1 PatriciaTrie (org.trie4j.patricia.PatriciaTrie)1 SBVConcatTailArrayBuilder (org.trie4j.tail.SBVConcatTailArrayBuilder)1 ConcatTailBuilder (org.trie4j.tail.builder.ConcatTailBuilder)1 IntArray (org.trie4j.util.IntArray)1