use of tl.lin.data.fd.Int2IntFrequencyDistributionEntry in project Cloud9 by lintool.
the class LookupPostings method lookupTerm.
public static void lookupTerm(String term, MapFile.Reader reader, String collectionPath, FileSystem fs) throws IOException {
FSDataInputStream collection = fs.open(new Path(collectionPath));
Text key = new Text();
PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();
key.set(term);
Writable w = reader.get(key, value);
if (w == null) {
System.out.println("\nThe term '" + term + "' does not appear in the collection");
return;
}
ArrayListWritable<PairOfInts> postings = value.getRightElement();
System.out.println("\nComplete postings list for '" + term + "':");
System.out.println("df = " + value.getLeftElement());
Int2IntFrequencyDistribution hist = new Int2IntFrequencyDistributionEntry();
for (PairOfInts pair : postings) {
hist.increment(pair.getRightElement());
System.out.print(pair);
collection.seek(pair.getLeftElement());
BufferedReader r = new BufferedReader(new InputStreamReader(collection));
String d = r.readLine();
d = d.length() > 80 ? d.substring(0, 80) + "..." : d;
System.out.println(": " + d);
}
System.out.println("\nHistogram of tf values for '" + term + "'");
for (PairOfInts pair : hist) {
System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
}
collection.close();
}
Aggregations