Search in sources :

Example 6 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TrieWriterTest method test.

@Test
public void test() throws Exception {
    LapTimer lt = new LapTimer();
    PatriciaTrie origTrie = new PatriciaTrie();
    new WikipediaTitles().insertTo(origTrie);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    TrieWriter tw = new TrieWriter(baos);
    Trie trie = new TailLOUDSTrie(origTrie, new LOUDSPPBvTree(origTrie.nodeSize()), new SuffixTrieDenseTailArrayBuilder());
    lt.reset();
    tw.write(trie);
    tw.flush();
    lt.lapMillis("trie saved.");
    System.out.println(baos.size() + " bytes");
    TrieReader tr = new TrieReader(new ByteArrayInputStream(baos.toByteArray()));
    lt.reset();
    Trie trie2 = tr.read();
    lt.lapMillis("trie loaded.");
    long d = new WikipediaTitles().assertAllContains(trie2);
    System.out.println("[" + d + "ms]: verified");
}
Also used : TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) LOUDSPPBvTree(org.trie4j.louds.bvtree.LOUDSPPBvTree) ByteArrayInputStream(java.io.ByteArrayInputStream) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) SuffixTrieDenseTailArrayBuilder(org.trie4j.tail.SuffixTrieDenseTailArrayBuilder) WikipediaTitles(org.trie4j.test.WikipediaTitles) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Trie(org.trie4j.Trie) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) LapTimer(org.trie4j.test.LapTimer) Test(org.junit.Test)

Example 7 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class AbstractMapTrieWikipediaSerializeTest method test.

@Test
@SuppressWarnings("unchecked")
public void test() throws Exception {
    WikipediaTitles wt = new WikipediaTitles();
    MapTrie<Integer> trie = wt.insertTo(newTrie());
    trie = buildSecondTrie(trie);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    LapTimer lt = new LapTimer();
    oos.writeObject(trie);
    oos.flush();
    long wd = lt.lapMillis();
    byte[] serialized = baos.toByteArray();
    lt.reset();
    MapTrie<Integer> t = (MapTrie<Integer>) new ObjectInputStream(new ByteArrayInputStream(serialized)).readObject();
    long rd = lt.lapMillis();
    long vd = wt.assertAllContains(t);
    System.out.println(String.format("%s%s%s, size: %d, write(ms): %d, read(ms): %d, verify(ms): %d.", trie.getClass().getSimpleName(), getBvTreeClassName(trie), getTailClassName(trie), serialized.length, wd, rd, vd));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) WikipediaTitles(org.trie4j.test.WikipediaTitles) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) LapTimer(org.trie4j.test.LapTimer) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test)

Example 8 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class TailPatriciaTrieWithSuffixTrieTailBuilderTest method investigate.

public void investigate() throws Exception {
    Trie t = new TailPatriciaTrie(new SuffixTrieTailBuilder());
    int start = 0;
    int end = 5;
    int i = 0;
    for (String word : new WikipediaTitles()) {
        if (i >= end)
            break;
        if (i >= start) {
            t.insert(word);
            System.out.println(word);
        }
        i++;
    }
    i = 0;
    for (String word : new WikipediaTitles()) {
        if (i >= end)
            break;
        if (i >= start)
            Assert.assertTrue(i + "th word: " + word, t.contains(word));
        i++;
    }
}
Also used : WikipediaTitles(org.trie4j.test.WikipediaTitles) Trie(org.trie4j.Trie) SuffixTrieTailBuilder(org.trie4j.tail.builder.SuffixTrieTailBuilder)

Example 9 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class SBVConcatTailArrayTest method test.

@Test
public void test() throws Exception {
    // 普通にSBVConcatTailArrayIndexBuilder使った場合と、
    // add毎にappendするTailArrayIndexBuilderを作ってそれを使った
    // 場合でbitvectorやcacheに差が出るか調べる
    TailPatriciaTrie org = new TailPatriciaTrie(new ConcatTailBuilder());
    new WikipediaTitles().insertTo(org);
    TailLOUDSTrie louds1 = new TailLOUDSTrie(org, new SBVConcatTailArrayAppendingBuilder());
    new WikipediaTitles().assertAllContains(louds1);
    BytesSuccinctBitVector sbv1 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds1.getTailArray()).getTailIndex()).getSbv();
    TailLOUDSTrie louds2 = new TailLOUDSTrie(org, new SBVConcatTailArrayBuilder());
    new WikipediaTitles().assertAllContains(louds2);
    BytesSuccinctBitVector sbv2 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds2.getTailArray()).getTailIndex()).getSbv();
    {
        int n = sbv1.size();
        System.out.println("sbv size: " + n);
        Assert.assertEquals(n, sbv2.size());
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th bit", sbv1.get(i), sbv2.get(i));
        }
    }
    {
        int[] countCache1 = sbv1.getCountCache0();
        int[] countCache2 = sbv2.getCountCache0();
        int n = countCache1.length;
        System.out.println("countCache0 size should be: " + (sbv1.size() / 64 + 1));
        System.out.println("countCache0 size: " + n);
        //			Assert.assertEquals(n, countCache2.length);
        n = Math.min(countCache1.length, countCache2.length);
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th index cache.", countCache1[i], countCache2[i]);
        }
    }
    {
        IntArray indexCache1 = sbv1.getIndexCache0();
        IntArray indexCache2 = sbv2.getIndexCache0();
        int n = indexCache1.size();
        System.out.println("indexCache0 size1: " + n);
        System.out.println("indexCache0 size2: " + indexCache2.size());
        //			Assert.assertEquals(n, countCache2.length);
        n = Math.min(indexCache1.size(), indexCache2.size());
        for (int i = 0; i < 10; i++) {
            System.out.print(indexCache1.get(i) + ", ");
        }
        System.out.println();
        for (int i = 0; i < 10; i++) {
            System.out.print(indexCache2.get(i) + ", ");
        }
        System.out.println();
        for (int i = 0; i < n; i++) {
            Assert.assertEquals(i + "th index cache.", indexCache1.get(i), indexCache2.get(i));
        }
    }
}
Also used : TailPatriciaTrie(org.trie4j.patricia.TailPatriciaTrie) TailLOUDSTrie(org.trie4j.louds.TailLOUDSTrie) IntArray(org.trie4j.util.IntArray) ConcatTailBuilder(org.trie4j.tail.builder.ConcatTailBuilder) BytesSuccinctBitVector(org.trie4j.bv.BytesSuccinctBitVector) WikipediaTitles(org.trie4j.test.WikipediaTitles) Test(org.junit.Test)

Example 10 with WikipediaTitles

use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.

the class BitVectorExp method main.

public static void main(String[] args) throws Exception {
    Trie trie = new PatriciaTrie();
    int c = 0;
    // You can download archive from http://dumps.wikimedia.org/jawiki/latest/
    LapTimer t = new LapTimer();
    for (String word : new WikipediaTitles()) {
        trie.insert(word);
        c++;
        if (c == maxCount)
            break;
    }
    t.lapMillis("trie building done. %d words.", c);
    final BytesSuccinctBitVector bv = new BytesSuccinctBitVector(5000000);
    final AtomicInteger nodeCount = new AtomicInteger();
    Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {

        @Override
        public boolean visit(Node node, int nest) {
            Node[] children = node.getChildren();
            if (children != null) {
                int n = node.getChildren().length;
                for (int i = 0; i < n; i++) {
                    bv.append(true);
                }
            }
            bv.append(false);
            nodeCount.incrementAndGet();
            return true;
        }
    });
    trie = null;
    t.lapMillis("done. %d nodes inserted. do rank and select", nodeCount.intValue());
    for (int i = 0; i < c; i += 100) {
        int count = bv.rank(i, true);
        bv.select(count, true);
    }
    t.lapMillis("done.");
    Thread.sleep(10000);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) Node(org.trie4j.Node) BytesSuccinctBitVector(org.trie4j.bv.BytesSuccinctBitVector) WikipediaTitles(org.trie4j.test.WikipediaTitles) Trie(org.trie4j.Trie) PatriciaTrie(org.trie4j.patricia.PatriciaTrie) LapTimer(org.trie4j.test.LapTimer) NodeVisitor(org.trie4j.NodeVisitor)

Aggregations

WikipediaTitles (org.trie4j.test.WikipediaTitles)17 LapTimer (org.trie4j.test.LapTimer)12 Test (org.junit.Test)8 TailPatriciaTrie (org.trie4j.patricia.TailPatriciaTrie)7 PrintWriter (java.io.PrintWriter)5 Trie (org.trie4j.Trie)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 TailLOUDSTrie (org.trie4j.louds.TailLOUDSTrie)4 FileOutputStream (java.io.FileOutputStream)3 ObjectInputStream (java.io.ObjectInputStream)3 OutputStream (java.io.OutputStream)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 PatriciaTrie (org.trie4j.patricia.PatriciaTrie)3 Node (org.trie4j.Node)2 NodeVisitor (org.trie4j.NodeVisitor)2 BytesSuccinctBitVector (org.trie4j.bv.BytesSuccinctBitVector)2 ConcatTailBuilder (org.trie4j.tail.builder.ConcatTailBuilder)2 DataOutputStream (java.io.DataOutputStream)1