use of tl.lin.data.array.ArrayListWritable in project Cloud9 by lintool.
the class BooleanRetrieval method fetchPostings.
private ArrayListWritable<PairOfInts> fetchPostings(String term) throws IOException {
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set(term);
index.get(key, value);
return value.getRightElement();
}
use of tl.lin.data.array.ArrayListWritable in project Cloud9 by lintool.
the class InvertedIndexingIT method testInvertedIndexing.
@Test
public void testInvertedIndexing() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPath));
String[] args = new String[] { "hadoop --config src/test/resources/hadoop-local-conf/ jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.example.ir.BuildInvertedIndex.class.getCanonicalName(), "-input", collectionPath.toString(), "-output", tmpPrefix, "-numReducers", "1" };
IntegrationUtils.exec(Joiner.on(" ").join(args));
MapFile.Reader reader = new MapFile.Reader(new Path(tmpPrefix + "/part-r-00000"), conf);
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set("gold");
reader.get(key, value);
assertEquals(584, value.getLeftElement().get());
ArrayListWritable<PairOfInts> postings = value.getRightElement();
assertEquals(584, value.getLeftElement().get());
assertEquals(5303, postings.get(0).getLeftElement());
assertEquals(684030, postings.get(100).getLeftElement());
assertEquals(1634312, postings.get(200).getLeftElement());
reader.close();
}
use of tl.lin.data.array.ArrayListWritable in project Cloud9 by lintool.
the class ClueWeb09EN01WebgraphIT method verifyWebGraph.
private void verifyWebGraph() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Reader reader;
IntWritable key = new IntWritable();
ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>();
reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(new Path(collectionOutput + "/" + DriverUtil.OUTPUT_WEBGRAPH + "/part-00000")));
//read key 200
reader.next(key, value);
verifyURLs(200, urlMap, value);
verifyLinks(200, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, internalLinkMap, value);
//skip key 400
reader.next(key, value);
//read key 600
reader.next(key, value);
verifyURLs(600, urlMap, value);
verifyLinks(600, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, internalLinkMap, value);
verifyLinks(600, AnchorTextConstants.Type.EXTERNAL_OUT_LINK.val, externalLinkMap, value);
reader.close();
reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(new Path(collectionOutput + "/" + DriverUtil.OUTPUT_WEBGRAPH + "/part-00010")));
//read key 10
reader.next(key, value);
verifyURLs(10, urlMap, value);
verifyLinks(10, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, internalLinkMap, value);
//skip key 210
reader.next(key, value);
//skip key 410
reader.next(key, value);
//read key 610
reader.next(key, value);
verifyURLs(610, urlMap, value);
verifyLinks(610, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, internalLinkMap, value);
reader.close();
}
use of tl.lin.data.array.ArrayListWritable in project Cloud9 by lintool.
the class Gov2WebgraphIT method verifyAnchors.
private void verifyAnchors() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Reader reader;
IntWritable key = new IntWritable();
ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>();
reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(new Path(collectionOutput + "/" + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/part-00000")));
reader.next(key, value);
reader.next(key, value);
verifyWeights(anchorList1, value);
verifySources(anchorSources1, value);
reader.close();
reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(new Path(collectionOutput + "/" + DriverUtil.OUTPUT_WEGIHTED_REVERSE_WEBGRAPH + "/part-00010")));
reader.next(key, value);
reader.next(key, value);
verifyWeights(anchorList2, value);
verifySources(anchorSources2, value);
reader.close();
}
use of tl.lin.data.array.ArrayListWritable in project Cloud9 by lintool.
the class IndexableAnchorTextForwardIndex method getDocument.
public IndexableAnchorText getDocument(int docno) {
int idx = Arrays.binarySearch(docnos, docno);
if (idx < 0)
idx = -idx - 2;
DecimalFormat df = new DecimalFormat("00000");
String file = collectionPath + "/part-" + df.format(filenos[idx]);
try {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(file), conf);
IntWritable key = new IntWritable();
ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>();
reader.seek(offsets[idx]);
while (reader.next(key)) {
if (key.get() == docno)
break;
}
reader.getCurrentValue(value);
reader.close();
indexableAnchorText.createHTML(value);
return indexableAnchorText;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
Aggregations