use of edu.umd.cloud9.collection.trec.TrecForwardIndex in project Cloud9 by lintool.
the class IT method testForwardIndex.
private void testForwardIndex() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPath));
String index = tmpPrefix + "-findex.dat";
String[] args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.collection.trec.TrecForwardIndexBuilder.class.getCanonicalName(), "-collection=" + collectionPath, "-index=" + index, "-docnoMapping=" + mappingFile };
IntegrationUtils.exec(Joiner.on(" ").join(args));
TrecForwardIndex findex = new TrecForwardIndex();
findex.loadIndex(new Path(index), new Path(mappingFile), fs);
assertTrue(findex.getDocument(1).getContent().contains("Newspapers in the Former Yugoslav Republic"));
assertTrue(findex.getDocument("FBIS3-1").getContent().contains("Newspapers in the Former Yugoslav Republic"));
assertEquals(1, findex.getFirstDocno());
assertEquals(472525, findex.getLastDocno());
}
Aggregations