use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.
the class AnalyzeBigramRelativeFrequencyTuple method main.
@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
CommandLine cmdline = null;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
System.exit(-1);
}
if (!cmdline.hasOption(INPUT)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(AnalyzeBigramRelativeFrequencyJson.class.getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
System.exit(-1);
}
String inputPath = cmdline.getOptionValue(INPUT);
System.out.println("input path: " + inputPath);
List<PairOfWritables<Tuple, FloatWritable>> pairs = SequenceFileUtils.readDirectory(new Path(inputPath));
List<PairOfWritables<Tuple, FloatWritable>> list1 = Lists.newArrayList();
List<PairOfWritables<Tuple, FloatWritable>> list2 = Lists.newArrayList();
for (PairOfWritables<Tuple, FloatWritable> p : pairs) {
Tuple bigram = p.getLeftElement();
if (bigram.get(0).equals("light")) {
list1.add(p);
}
if (bigram.get(0).equals("contain")) {
list2.add(p);
}
}
Collections.sort(list1, new Comparator<PairOfWritables<Tuple, FloatWritable>>() {
@SuppressWarnings("unchecked")
public int compare(PairOfWritables<Tuple, FloatWritable> e1, PairOfWritables<Tuple, FloatWritable> e2) {
if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
return e1.getLeftElement().compareTo(e2.getLeftElement());
}
return e2.getRightElement().compareTo(e1.getRightElement());
}
});
Iterator<PairOfWritables<Tuple, FloatWritable>> iter1 = Iterators.limit(list1.iterator(), 10);
while (iter1.hasNext()) {
PairOfWritables<Tuple, FloatWritable> p = iter1.next();
Tuple bigram = p.getLeftElement();
System.out.println(bigram + "\t" + p.getRightElement());
}
Collections.sort(list2, new Comparator<PairOfWritables<Tuple, FloatWritable>>() {
@SuppressWarnings("unchecked")
public int compare(PairOfWritables<Tuple, FloatWritable> e1, PairOfWritables<Tuple, FloatWritable> e2) {
if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
return e1.getLeftElement().compareTo(e2.getLeftElement());
}
return e2.getRightElement().compareTo(e1.getRightElement());
}
});
Iterator<PairOfWritables<Tuple, FloatWritable>> iter2 = Iterators.limit(list2.iterator(), 10);
while (iter2.hasNext()) {
PairOfWritables<Tuple, FloatWritable> p = iter2.next();
Tuple bigram = p.getLeftElement();
System.out.println(bigram + "\t" + p.getRightElement());
}
}
use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.
the class HubsAndAuthoritiesSchimmy method readSums.
private ArrayList<Float> readSums(JobConf jconf, String pathIn) throws Exception {
ArrayList<Float> output = new ArrayList<Float>();
float rootSumA = -1;
float rootSumH = -1;
SequenceFile.Reader reader = null;
try {
Configuration cfg = new Configuration();
FileSystem fs = FileSystem.get(cfg);
Path sumsIn = new Path(pathIn);
// FSDataInputStream in = fs.open(sumsIn);
reader = new SequenceFile.Reader(fs, sumsIn, jconf);
Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), jconf);
FloatWritable value = (FloatWritable) ReflectionUtils.newInstance(reader.getValueClass(), jconf);
while (reader.next(key, value)) {
// System.out.printf("%s\t%s\n", key, value);
if (key.toString().equals("A")) {
rootSumA = value.get();
} else if (key.toString().equals("H")) {
rootSumH = value.get();
} else {
System.out.println("PROBLEM");
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(reader);
}
if (rootSumA == -1 || rootSumH == -1) {
throw new Exception("error: rootSum == - 1");
}
output.add(new Float(rootSumA));
output.add(new Float(rootSumH));
return output;
}
use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.
the class Phrase2CountMap method readFields.
public void readFields(DataInput in) throws IOException {
this.clear();
int size = in.readInt();
for (int i = 0; i < size; i++) {
Phrase p = new Phrase();
FloatWritable c = new FloatWritable();
p.readFields(in);
c.readFields(in);
this.put(p, c);
}
}
use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.
the class Phrase2CountMap method plusEquals.
public final void plusEquals(Phrase2CountMap rhs) {
for (Map.Entry<Phrase, FloatWritable> ri : rhs.entrySet()) {
FloatWritable cv = this.get(ri.getKey());
if (cv == null) {
cv = new FloatWritable(0);
this.put(ri.getKey(), cv);
}
cv.set(cv.get() + ri.getValue().get());
}
}
use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.
the class BigramRelativeFrequencyIT method testBigramRelativeFrequencyBase.
@Test
public void testBigramRelativeFrequencyBase() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
assertTrue(fs.exists(collectionPath));
String[] args = new String[] { "hadoop --config src/test/resources/hadoop-local-conf/ jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.example.bigram.BigramRelativeFrequency.class.getCanonicalName(), "-input", collectionPath.toString(), "-output", tmpPrefix + "-base", "-numReducers", "1" };
IntegrationUtils.exec(Joiner.on(" ").join(args));
SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path(tmpPrefix + "-base/part-r-00000")));
PairOfStrings pair = new PairOfStrings();
FloatWritable f = new FloatWritable();
reader.next(pair, f);
assertEquals("&c", pair.getLeftElement());
assertEquals("*", pair.getRightElement());
assertEquals(17f, f.get(), 10e-6);
for (int i = 0; i < 100; i++) {
reader.next(pair, f);
}
assertEquals("'dear", pair.getLeftElement());
assertEquals("*", pair.getRightElement());
assertEquals(2f, f.get(), 10e-6);
reader.next(pair, f);
assertEquals("'dear", pair.getLeftElement());
assertEquals("lord", pair.getRightElement());
assertEquals(1f, f.get(), 10e-6);
reader.close();
}
Aggregations