Search in sources :

Example 21 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class IterateBfs method run.

@SuppressWarnings("static-access")
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of partitions").create(NUM_PARTITIONS_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(NUM_PARTITIONS_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_PARTITIONS_OPTION));
    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numPartitions: " + n);
    getConf().set("mapred.child.java.opts", "-Xmx2048m");
    Job job = Job.getInstance(getConf());
    job.setJobName(String.format("IterateBfs[%s: %s, %s: %s, %s: %d]", INPUT_OPTION, inputPath, OUTPUT_OPTION, outputPath, NUM_PARTITIONS_OPTION, n));
    job.setJarByClass(EncodeBfsGraph.class);
    job.setNumReduceTasks(n);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(BfsNode.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(BfsNode.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReduceClass.class);
    // Delete the output directory if it exists already.
    FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 22 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class AnalyzeBigramCount method main.

@SuppressWarnings({ "static-access" })
public static void main(String[] args) {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(AnalyzeBigramCount.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    System.out.println("input path: " + inputPath);
    List<PairOfWritables<Text, IntWritable>> bigrams = SequenceFileUtils.readDirectory(new Path(inputPath));
    Collections.sort(bigrams, new Comparator<PairOfWritables<Text, IntWritable>>() {

        public int compare(PairOfWritables<Text, IntWritable> e1, PairOfWritables<Text, IntWritable> e2) {
            if (e2.getRightElement().compareTo(e1.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }
            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });
    int singletons = 0;
    int sum = 0;
    for (PairOfWritables<Text, IntWritable> bigram : bigrams) {
        sum += bigram.getRightElement().get();
        if (bigram.getRightElement().get() == 1) {
            singletons++;
        }
    }
    System.out.println("total number of unique bigrams: " + bigrams.size());
    System.out.println("total number of bigrams: " + sum);
    System.out.println("number of bigrams that appear only once: " + singletons);
    System.out.println("\nten most frequent bigrams: ");
    Iterator<PairOfWritables<Text, IntWritable>> iter = Iterators.limit(bigrams.iterator(), 10);
    while (iter.hasNext()) {
        PairOfWritables<Text, IntWritable> bigram = iter.next();
        System.out.println(bigram.getLeftElement() + "\t" + bigram.getRightElement());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) Text(org.apache.hadoop.io.Text) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) PairOfWritables(tl.lin.data.pair.PairOfWritables) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) IntWritable(org.apache.hadoop.io.IntWritable)

Example 23 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class AnalyzeBigramRelativeFrequencyJson method main.

@SuppressWarnings({ "static-access" })
public static void main(String[] args) {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(AnalyzeBigramRelativeFrequencyJson.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    System.out.println("input path: " + inputPath);
    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> pairs = SequenceFileUtils.readDirectory(new Path(inputPath));
    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> list1 = Lists.newArrayList();
    List<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> list2 = Lists.newArrayList();
    for (PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p : pairs) {
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();
        if (bigram.getJsonObject().get("Left").getAsString().equals("light")) {
            list1.add(p);
        }
        if (bigram.getJsonObject().get("Left").getAsString().equals("contain")) {
            list2.add(p);
        }
    }
    Collections.sort(list1, new Comparator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>>() {

        public int compare(PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e1, PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }
            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });
    Iterator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> iter1 = Iterators.limit(list1.iterator(), 10);
    while (iter1.hasNext()) {
        PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p = iter1.next();
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
    }
    Collections.sort(list2, new Comparator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>>() {

        public int compare(PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e1, PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> e2) {
            if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) {
                return e1.getLeftElement().compareTo(e2.getLeftElement());
            }
            return e2.getRightElement().compareTo(e1.getRightElement());
        }
    });
    Iterator<PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable>> iter2 = Iterators.limit(list2.iterator(), 10);
    while (iter2.hasNext()) {
        PairOfWritables<BigramRelativeFrequencyJson.MyTuple, FloatWritable> p = iter2.next();
        BigramRelativeFrequencyJson.MyTuple bigram = p.getLeftElement();
        System.out.println(bigram + "\t" + p.getRightElement());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FloatWritable(org.apache.hadoop.io.FloatWritable) PairOfWritables(tl.lin.data.pair.PairOfWritables) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException)

Example 24 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class SequentialPersonalizedPageRank method main.

@SuppressWarnings({ "static-access" })
public static void main(String[] args) throws IOException {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("val").hasArg().withDescription("random jump factor").create(JUMP));
    options.addOption(OptionBuilder.withArgName("node").hasArg().withDescription("source node (i.e., destination of the random jump)").create(SOURCE));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCE)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(SequentialPersonalizedPageRank.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String infile = cmdline.getOptionValue(INPUT);
    final String source = cmdline.getOptionValue(SOURCE);
    float alpha = cmdline.hasOption(JUMP) ? Float.parseFloat(cmdline.getOptionValue(JUMP)) : 0.15f;
    int edgeCnt = 0;
    DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>();
    BufferedReader data = new BufferedReader(new InputStreamReader(new FileInputStream(infile)));
    String line;
    while ((line = data.readLine()) != null) {
        line.trim();
        String[] arr = line.split("\\t");
        for (int i = 1; i < arr.length; i++) {
            graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]);
        }
    }
    data.close();
    if (!graph.containsVertex(source)) {
        System.err.println("Error: source node not found in the graph!");
        System.exit(-1);
    }
    WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<String, Integer>();
    Set<Set<String>> components = clusterer.transform(graph);
    int numComponents = components.size();
    System.out.println("Number of components: " + numComponents);
    System.out.println("Number of edges: " + graph.getEdgeCount());
    System.out.println("Number of nodes: " + graph.getVertexCount());
    System.out.println("Random jump factor: " + alpha);
    // Compute personalized PageRank.
    PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<String, Integer>(graph, new Transformer<String, Double>() {

        @Override
        public Double transform(String vertex) {
            return vertex.equals(source) ? 1.0 : 0;
        }
    }, alpha);
    ranker.evaluate();
    // Use priority queue to sort vertices by PageRank values.
    PriorityQueue<Ranking<String>> q = new PriorityQueue<Ranking<String>>();
    int i = 0;
    for (String pmid : graph.getVertices()) {
        q.add(new Ranking<String>(i++, ranker.getVertexScore(pmid), pmid));
    }
    // Print PageRank values.
    System.out.println("\nPageRank of nodes, in descending order:");
    Ranking<String> r = null;
    while ((r = q.poll()) != null) {
        System.out.println(r.rankScore + "\t" + r.getRanked());
    }
}
Also used : Options(org.apache.commons.cli.Options) Set(java.util.Set) GnuParser(org.apache.commons.cli.GnuParser) PageRankWithPriors(edu.uci.ics.jung.algorithms.scoring.PageRankWithPriors) HelpFormatter(org.apache.commons.cli.HelpFormatter) Ranking(edu.uci.ics.jung.algorithms.importance.Ranking) CommandLineParser(org.apache.commons.cli.CommandLineParser) DirectedSparseGraph(edu.uci.ics.jung.graph.DirectedSparseGraph) InputStreamReader(java.io.InputStreamReader) PriorityQueue(java.util.PriorityQueue) FileInputStream(java.io.FileInputStream) WeakComponentClusterer(edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer) CommandLine(org.apache.commons.cli.CommandLine) BufferedReader(java.io.BufferedReader) ParseException(org.apache.commons.cli.ParseException)

Example 25 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class ComputeCooccurrenceMatrixStripes method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;
    LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);
    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixStripes.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);
    job.getConfiguration().setInt("window", window);
    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapStIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapStIW.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

HelpFormatter (org.apache.commons.cli.HelpFormatter)314 Options (org.apache.commons.cli.Options)163 CommandLine (org.apache.commons.cli.CommandLine)153 CommandLineParser (org.apache.commons.cli.CommandLineParser)128 ParseException (org.apache.commons.cli.ParseException)107 GnuParser (org.apache.commons.cli.GnuParser)106 Path (org.apache.hadoop.fs.Path)50 Option (org.apache.commons.cli.Option)43 PrintWriter (java.io.PrintWriter)42 Job (org.apache.hadoop.mapreduce.Job)28 IOException (java.io.IOException)27 Configuration (org.apache.hadoop.conf.Configuration)24 File (java.io.File)22 DefaultParser (org.apache.commons.cli.DefaultParser)22 PosixParser (org.apache.commons.cli.PosixParser)15 FileSystem (org.apache.hadoop.fs.FileSystem)13 BasicParser (org.apache.commons.cli.BasicParser)12 ArrayList (java.util.ArrayList)9 ToolOptions (com.google.api.tools.framework.tools.ToolOptions)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6