use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.
the class SBVConcatTailArrayTest method test.
@Test
public void test() throws Exception {
// 普通にSBVConcatTailArrayIndexBuilder使った場合と、
// add毎にappendするTailArrayIndexBuilderを作ってそれを使った
// 場合でbitvectorやcacheに差が出るか調べる
TailPatriciaTrie org = new TailPatriciaTrie(new ConcatTailBuilder());
new WikipediaTitles().insertTo(org);
TailLOUDSTrie louds1 = new TailLOUDSTrie(org, new SBVConcatTailArrayAppendingBuilder());
new WikipediaTitles().assertAllContains(louds1);
BytesSuccinctBitVector sbv1 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds1.getTailArray()).getTailIndex()).getSbv();
TailLOUDSTrie louds2 = new TailLOUDSTrie(org, new SBVConcatTailArrayBuilder());
new WikipediaTitles().assertAllContains(louds2);
BytesSuccinctBitVector sbv2 = (BytesSuccinctBitVector) ((SBVTailIndex) ((DefaultTailArray) louds2.getTailArray()).getTailIndex()).getSbv();
{
int n = sbv1.size();
System.out.println("sbv size: " + n);
Assert.assertEquals(n, sbv2.size());
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th bit", sbv1.get(i), sbv2.get(i));
}
}
{
int[] countCache1 = sbv1.getCountCache0();
int[] countCache2 = sbv2.getCountCache0();
int n = countCache1.length;
System.out.println("countCache0 size should be: " + (sbv1.size() / 64 + 1));
System.out.println("countCache0 size: " + n);
// Assert.assertEquals(n, countCache2.length);
n = Math.min(countCache1.length, countCache2.length);
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th index cache.", countCache1[i], countCache2[i]);
}
}
{
IntArray indexCache1 = sbv1.getIndexCache0();
IntArray indexCache2 = sbv2.getIndexCache0();
int n = indexCache1.size();
System.out.println("indexCache0 size1: " + n);
System.out.println("indexCache0 size2: " + indexCache2.size());
// Assert.assertEquals(n, countCache2.length);
n = Math.min(indexCache1.size(), indexCache2.size());
for (int i = 0; i < 10; i++) {
System.out.print(indexCache1.get(i) + ", ");
}
System.out.println();
for (int i = 0; i < 10; i++) {
System.out.print(indexCache2.get(i) + ", ");
}
System.out.println();
for (int i = 0; i < n; i++) {
Assert.assertEquals(i + "th index cache.", indexCache1.get(i), indexCache2.get(i));
}
}
}
use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.
the class CreateTail method main.
public static void main(String[] args) throws Exception {
TailPatriciaTrie trie = new TailPatriciaTrie();
for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
trie.insert(s);
}
ConcatTailArrayBuilder ta = new ConcatTailArrayBuilder(trie.size());
new TailLOUDSTrie(trie, ta);
OutputStream os = new FileOutputStream("data/jawiki-20120220-tail");
try {
/* CharSequence seq = ta.build().getTails();
byte[] bytes = seq.toString().getBytes("UTF16");
System.out.println(seq.length() + "chars.");
System.out.println(bytes.length + "bytes.");
os.write(bytes);
*/
} finally {
os.close();
}
}
use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.
the class SaveLOUDSTrie method main.
public static void main(String[] args) throws Exception {
TailPatriciaTrie trie1 = new TailPatriciaTrie();
for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
trie1.insert(s);
}
System.out.println(trie1.size() + "nodes.");
SBVConcatTailArrayBuilder tailArray = new SBVConcatTailArrayBuilder(trie1.size());
TailLOUDSTrie trie = new TailLOUDSTrie(trie1, tailArray);
System.out.println(trie.size() + "nodes.");
trie.freeze();
OutputStream os = new FileOutputStream("louds.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
trie.writeExternal(oos);
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-bv.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
oos.writeObject(trie.getBvTree());
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-labels.dat");
try {
DataOutputStream dos = new DataOutputStream(os);
for (char c : trie.getLabels()) {
dos.writeChar(c);
}
dos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-tails.dat");
try {
ObjectOutputStream dos = new ObjectOutputStream(os);
dos.writeObject(tailArray);
dos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-tailIndex.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
oos.writeObject(tailArray);
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-term.dat");
try {
ObjectOutputStream dos = new ObjectOutputStream(os);
dos.writeObject(trie.getTerm());
dos.flush();
} finally {
os.close();
}
}
use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.
the class Test method main.
public static void main(String[] args) throws Exception {
System.out.println("--- tail patricia trie ---");
go(new TailPatriciaTrie());
// System.out.println("--- multilayer patricia trie ---");
// go(new MultilayerPatriciaTrie());
// System.out.println("--- hash trie ---");
// go(new HashSetTrie());
}
use of org.trie4j.patricia.TailPatriciaTrie in project trie4j by takawitter.
the class TestWikipedia method investigate.
@SuppressWarnings("unused")
private static void investigate(Trie trie) throws Exception {
System.out.println("-- dump root children.");
for (Node n : trie.getRoot().getChildren()) {
System.out.print(n.getLetters()[0]);
}
System.out.println();
System.out.println("-- count elements.");
final AtomicInteger count = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if (node.isTerminate())
count.incrementAndGet();
return true;
}
});
System.out.println(count.intValue() + " elements.");
//*
System.out.println("-- list elements.");
final AtomicInteger n = new AtomicInteger();
final AtomicInteger l = new AtomicInteger();
final AtomicInteger ln = new AtomicInteger();
final AtomicInteger chars = new AtomicInteger();
Algorithms.traverseByDepth(trie.getRoot(), new NodeVisitor() {
public boolean visit(Node node, int nest) {
if (node.isTerminate()) {
l.incrementAndGet();
} else {
n.incrementAndGet();
}
return true;
}
});
System.out.println("node: " + n.intValue());
System.out.println("leaf: " + l.intValue());
System.out.println("label node: " + ln.intValue());
System.out.println("total char count in trie: " + chars.intValue());
System.out.println("verifying trie...");
long lap = System.currentTimeMillis();
int c = 0;
int sum = 0;
for (String word : new WikipediaTitles()) {
if (c == maxCount)
break;
long d = System.currentTimeMillis();
//trie.contains(word);
boolean found = Algorithms.contains(trie.getRoot(), word);
sum += System.currentTimeMillis() - d;
if (!found) {
System.out.println("trie not contains [" + word + "]");
break;
}
if (c % 100000 == 0) {
System.out.println(c + " elements done.");
}
c++;
}
System.out.println("done in " + (System.currentTimeMillis() - lap) + " millis.");
System.out.println("contains time: " + sum + " millis.");
// System.out.println(trie.getRoot().getChildren().length + "children in root");
if (trie instanceof TailPatriciaTrie) {
// ((TailPatriciaTrie) trie).pack();
System.out.println("tail length: " + ((TailPatriciaTrie) trie).getTailBuilder().getTails().length());
}
final Trie t = trie;
new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(100000);
t.contains("hello");
} catch (InterruptedException e) {
}
}
}).start();
//*/
}
Aggregations