use of tl.lin.data.pair.PairOfInts in project Cloud9 by lintool.
the class BooleanRetrieval method fetchPostings.
private ArrayListWritable<PairOfInts> fetchPostings(String term) throws IOException {
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set(term);
index.get(key, value);
return value.getRightElement();
}
use of tl.lin.data.pair.PairOfInts in project Cloud9 by lintool.
the class InvertedIndexingIT method testInvertedIndexing.
@Test
public void testInvertedIndexing() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPath));
String[] args = new String[] { "hadoop --config src/test/resources/hadoop-local-conf/ jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.example.ir.BuildInvertedIndex.class.getCanonicalName(), "-input", collectionPath.toString(), "-output", tmpPrefix, "-numReducers", "1" };
IntegrationUtils.exec(Joiner.on(" ").join(args));
MapFile.Reader reader = new MapFile.Reader(new Path(tmpPrefix + "/part-r-00000"), conf);
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set("gold");
reader.get(key, value);
assertEquals(584, value.getLeftElement().get());
ArrayListWritable<PairOfInts> postings = value.getRightElement();
assertEquals(584, value.getLeftElement().get());
assertEquals(5303, postings.get(0).getLeftElement());
assertEquals(684030, postings.get(100).getLeftElement());
assertEquals(1634312, postings.get(200).getLeftElement());
reader.close();
}
use of tl.lin.data.pair.PairOfInts in project Cloud9 by lintool.
the class LookupPostings method lookupTerm.
public static void lookupTerm(String term, MapFile.Reader reader, String collectionPath, FileSystem fs) throws IOException {
FSDataInputStream collection = fs.open(new Path(collectionPath));
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set(term);
Writable w = reader.get(key, value);
if (w == null) {
System.out.println("\nThe term '" + term + "' does not appear in the collection");
return;
}
ArrayListWritable<PairOfInts> postings = value.getRightElement();
System.out.println("\nComplete postings list for '" + term + "':");
System.out.println("df = " + value.getLeftElement());
Int2IntFrequencyDistribution hist = new Int2IntFrequencyDistributionEntry();
for (PairOfInts pair : postings) {
hist.increment(pair.getRightElement());
System.out.print(pair);
collection.seek(pair.getLeftElement());
BufferedReader r = new BufferedReader(new InputStreamReader(collection));
String d = r.readLine();
d = d.length() > 80 ? d.substring(0, 80) + "..." : d;
System.out.println(": " + d);
}
System.out.println("\nHistogram of tf values for '" + term + "'");
for (PairOfInts pair : hist) {
System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
}
collection.close();
}
Aggregations