use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class CreateTail method main.
public static void main(String[] args) throws Exception {
TailPatriciaTrie trie = new TailPatriciaTrie();
for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
trie.insert(s);
}
ConcatTailArrayBuilder ta = new ConcatTailArrayBuilder(trie.size());
new TailLOUDSTrie(trie, ta);
OutputStream os = new FileOutputStream("data/jawiki-20120220-tail");
try {
/* CharSequence seq = ta.build().getTails();
byte[] bytes = seq.toString().getBytes("UTF16");
System.out.println(seq.length() + "chars.");
System.out.println(bytes.length + "bytes.");
os.write(bytes);
*/
} finally {
os.close();
}
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class AbstractMapTrieWikipediaTest method test.
@Test
public void test() throws Exception {
MapTrie<Integer> trie = createFirstTrie();
System.out.println("building first trie: " + trie.getClass().getName());
int c = 0, chars = 0;
long b = 0;
LapTimer t = new LapTimer();
for (String word : new WikipediaTitles(wikipediaFilename)) {
try {
t.reset();
trie.insert(word, c);
b += t.lapNanos();
} catch (Exception e) {
System.out.println("exception at " + c + "th word: " + word);
trie.dump(new PrintWriter(System.out));
throw e;
}
c++;
chars += word.length();
}
System.out.println(String.format("done in %d millis with %d words and %d chars.", (b / 1000000), c, chars));
MapTrie<Integer> second = buildSecondTrie(trie);
try {
getClass().getDeclaredMethod("buildSecondTrie", Trie.class);
System.out.print("building second trie: ");
t.reset();
second = buildSecondTrie(trie);
System.out.println("done in " + t.lapMillis() + "millis.");
} catch (NoSuchMethodException e) {
}
System.out.println("verifying trie.");
long sum = 0;
c = 0;
for (String word : new WikipediaTitles(wikipediaFilename)) {
t.reset();
boolean found = (int) second.get(word) == c;
sum += t.lapNanos();
c++;
if (!found) {
System.out.println(String.format("verification failed. trie not contains %d th word: [%s]" + " with id: [%d] actual: [%d].", c, word, c, second.get(word)));
break;
}
}
System.out.println("done in " + (sum / 1000000) + " millis with " + c + " words.");
afterVerification(second);
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class SaveLOUDSTrie method main.
public static void main(String[] args) throws Exception {
TailPatriciaTrie trie1 = new TailPatriciaTrie();
for (String s : new WikipediaTitles("data/jawiki-20120220-all-titles-in-ns0.gz")) {
trie1.insert(s);
}
System.out.println(trie1.size() + "nodes.");
SBVConcatTailArrayBuilder tailArray = new SBVConcatTailArrayBuilder(trie1.size());
TailLOUDSTrie trie = new TailLOUDSTrie(trie1, tailArray);
System.out.println(trie.size() + "nodes.");
trie.freeze();
OutputStream os = new FileOutputStream("louds.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
trie.writeExternal(oos);
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-bv.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
oos.writeObject(trie.getBvTree());
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-labels.dat");
try {
DataOutputStream dos = new DataOutputStream(os);
for (char c : trie.getLabels()) {
dos.writeChar(c);
}
dos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-tails.dat");
try {
ObjectOutputStream dos = new ObjectOutputStream(os);
dos.writeObject(tailArray);
dos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-tailIndex.dat");
try {
ObjectOutputStream oos = new ObjectOutputStream(os);
oos.writeObject(tailArray);
oos.flush();
} finally {
os.close();
}
os = new FileOutputStream("louds-term.dat");
try {
ObjectOutputStream dos = new ObjectOutputStream(os);
dos.writeObject(trie.getTerm());
dos.flush();
} finally {
os.close();
}
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class TestIO method verify.
private static void verify(Trie da) throws Exception {
System.out.println("verifying double array...");
int c = 0;
int sum = 0;
LapTimer t1 = new LapTimer();
LapTimer t = new LapTimer();
for (String word : new WikipediaTitles()) {
if (c == maxCount)
break;
t.reset();
boolean found = da.contains(word);
sum += t.lapMillis();
c++;
if (!found) {
System.out.println("verification failed. trie not contains " + c + " th word: [" + word + "]");
break;
}
}
System.out.println("done " + c + "words in " + t1.lapMillis() + " millis.");
System.out.println("contains time: " + sum + " millis.");
}
use of org.trie4j.test.WikipediaTitles in project trie4j by takawitter.
the class AbstractWikipediaSerializeTest method test.
@Test
public void test() throws Exception {
WikipediaTitles wt = new WikipediaTitles();
Trie trie = wt.insertTo(firstTrie());
trie = secondTrie(trie);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
LapTimer lt = new LapTimer();
oos.writeObject(trie);
oos.flush();
long wd = lt.lapMillis();
byte[] serialized = baos.toByteArray();
lt.reset();
Trie t = (Trie) new ObjectInputStream(new ByteArrayInputStream(serialized)).readObject();
long rd = lt.lapMillis();
long vd = wt.assertAllContains(t);
System.out.println(String.format("%s%s%s, size: %d, write(ms): %d, read(ms): %d, verify(ms): %d.", trie.getClass().getSimpleName(), getBvTreeClassName(trie), getTailClassName(trie), serialized.length, wd, rd, vd));
}
Aggregations