Search in sources :

Example 76 with FloatWritable

use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.

the class AnalyzeBigramRelativeFrequencyTuple method main.

@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(AnalyzeBigramRelativeFrequencyJson.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    System.out.println("input path: " + inputPath);
    List<PairOfWritables<Tuple, FloatWritable>> pairs = SequenceFileUtils.readDirectory(new Path(inputPath));
    List<PairOfWritables<Tuple, FloatWritable>> list1 = Lists.newArrayList();
    List<PairOfWritables<Tuple, FloatWritable>> list2 = Lists.newArrayList();
    for (PairOfWritables<Tuple, FloatWritable> p : pairs) {
        Tuple bigram = p.getLeftElement();
        if (bigram.get(0).equals("light")) {
            list1.add(p);
        }
        if (bigram.get(0).equals("contain")) {
            list2.add(p);
        }
    }
    Collections.sort(list1, new Comparator<PairOfWritables<Tuple, FloatWritable>>() {

        @SuppressWarnings("unchecked")
        public int compare(PairOfWritables<Tuple, FloatWritable> e1, PairOfWritables<Tuple, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }
            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });
    Iterator<PairOfWritables<Tuple, FloatWritable>> iter1 = Iterators.limit(list1.iterator(), 10);
    while (iter1.hasNext()) {
        PairOfWritables<Tuple, FloatWritable> p = iter1.next();
        Tuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
    }
    Collections.sort(list2, new Comparator<PairOfWritables<Tuple, FloatWritable>>() {

        @SuppressWarnings("unchecked")
        public int compare(PairOfWritables<Tuple, FloatWritable> e1, PairOfWritables<Tuple, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }
            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });
    Iterator<PairOfWritables<Tuple, FloatWritable>> iter2 = Iterators.limit(list2.iterator(), 10);
    while (iter2.hasNext()) {
        PairOfWritables<Tuple, FloatWritable> p = iter2.next();
        Tuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FloatWritable(org.apache.hadoop.io.FloatWritable) PairOfWritables(tl.lin.data.pair.PairOfWritables) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Tuple(org.apache.pig.data.Tuple)

Example 77 with FloatWritable

use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.

the class HubsAndAuthoritiesSchimmy method readSums.

private ArrayList<Float> readSums(JobConf jconf, String pathIn) throws Exception {
    ArrayList<Float> output = new ArrayList<Float>();
    float rootSumA = -1;
    float rootSumH = -1;
    SequenceFile.Reader reader = null;
    try {
        Configuration cfg = new Configuration();
        FileSystem fs = FileSystem.get(cfg);
        Path sumsIn = new Path(pathIn);
        // FSDataInputStream in = fs.open(sumsIn);
        reader = new SequenceFile.Reader(fs, sumsIn, jconf);
        Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), jconf);
        FloatWritable value = (FloatWritable) ReflectionUtils.newInstance(reader.getValueClass(), jconf);
        while (reader.next(key, value)) {
            // System.out.printf("%s\t%s\n", key, value);
            if (key.toString().equals("A")) {
                rootSumA = value.get();
            } else if (key.toString().equals("H")) {
                rootSumH = value.get();
            } else {
                System.out.println("PROBLEM");
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }
    if (rootSumA == -1 || rootSumH == -1) {
        throw new Exception("error: rootSum == - 1");
    }
    output.add(new Float(rootSumA));
    output.add(new Float(rootSumH));
    return output;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IOException(java.io.IOException) FloatWritable(org.apache.hadoop.io.FloatWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 78 with FloatWritable

use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.

the class Phrase2CountMap method readFields.

public void readFields(DataInput in) throws IOException {
    this.clear();
    int size = in.readInt();
    for (int i = 0; i < size; i++) {
        Phrase p = new Phrase();
        FloatWritable c = new FloatWritable();
        p.readFields(in);
        c.readFields(in);
        this.put(p, c);
    }
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable)

Example 79 with FloatWritable

use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.

the class Phrase2CountMap method plusEquals.

public final void plusEquals(Phrase2CountMap rhs) {
    for (Map.Entry<Phrase, FloatWritable> ri : rhs.entrySet()) {
        FloatWritable cv = this.get(ri.getKey());
        if (cv == null) {
            cv = new FloatWritable(0);
            this.put(ri.getKey(), cv);
        }
        cv.set(cv.get() + ri.getValue().get());
    }
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 80 with FloatWritable

use of org.apache.hadoop.io.FloatWritable in project Cloud9 by lintool.

the class BigramRelativeFrequencyIT method testBigramRelativeFrequencyBase.

@Test
public void testBigramRelativeFrequencyBase() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    assertTrue(fs.exists(collectionPath));
    String[] args = new String[] { "hadoop --config src/test/resources/hadoop-local-conf/ jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.example.bigram.BigramRelativeFrequency.class.getCanonicalName(), "-input", collectionPath.toString(), "-output", tmpPrefix + "-base", "-numReducers", "1" };
    IntegrationUtils.exec(Joiner.on(" ").join(args));
    SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path(tmpPrefix + "-base/part-r-00000")));
    PairOfStrings pair = new PairOfStrings();
    FloatWritable f = new FloatWritable();
    reader.next(pair, f);
    assertEquals("&c", pair.getLeftElement());
    assertEquals("*", pair.getRightElement());
    assertEquals(17f, f.get(), 10e-6);
    for (int i = 0; i < 100; i++) {
        reader.next(pair, f);
    }
    assertEquals("'dear", pair.getLeftElement());
    assertEquals("*", pair.getRightElement());
    assertEquals(2f, f.get(), 10e-6);
    reader.next(pair, f);
    assertEquals("'dear", pair.getLeftElement());
    assertEquals("lord", pair.getRightElement());
    assertEquals(1f, f.get(), 10e-6);
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FloatWritable(org.apache.hadoop.io.FloatWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) PairOfStrings(tl.lin.data.pair.PairOfStrings) Test(org.junit.Test)

Aggregations

FloatWritable (org.apache.hadoop.io.FloatWritable)111 IntWritable (org.apache.hadoop.io.IntWritable)68 LongWritable (org.apache.hadoop.io.LongWritable)65 BooleanWritable (org.apache.hadoop.io.BooleanWritable)54 Text (org.apache.hadoop.io.Text)51 Test (org.junit.Test)49 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)44 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)40 BytesWritable (org.apache.hadoop.io.BytesWritable)40 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)37 Writable (org.apache.hadoop.io.Writable)28 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)27 ArrayList (java.util.ArrayList)24 Configuration (org.apache.hadoop.conf.Configuration)18 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 Path (org.apache.hadoop.fs.Path)17 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)17 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)17