Search in sources :

Example 56 with CommandLineParser

use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.

the class BooleanRetrieval method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(BooleanRetrieval.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);
    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }
    FileSystem fs = FileSystem.get(new Configuration());
    initialize(indexPath, collectionPath, fs);
    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };
    for (String q : queries) {
        System.out.println("Query: " + q);
        runQuery(q);
        System.out.println("");
    }
    return 1;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException)

Example 57 with CommandLineParser

use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.

the class BuildInvertedIndex method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
    LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);
    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndex.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndex.class);
    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PairOfWritables.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 58 with CommandLineParser

use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.

the class BuildPageRankRecords method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    Job job = Job.getInstance(conf);
    job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPageRankRecords.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);
    job.setMapperClass(MyMapper.class);
    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 59 with CommandLineParser

use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.

the class DumpPageRankRecordsToPlainText method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    LOG.info("Tool name: " + DumpPageRankRecordsToPlainText.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);
    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    Job job = Job.getInstance(conf);
    job.setJobName(DumpPageRankRecordsToPlainText.class.getSimpleName());
    job.setJarByClass(DumpPageRankRecordsToPlainText.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);
    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 60 with CommandLineParser

use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.

the class FindMaxPageRankNodes method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(TOP)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(TOP));
    LOG.info("Tool name: " + FindMaxPageRankNodes.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);
    LOG.info(" - top: " + n);
    Configuration conf = getConf();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setInt("n", n);
    Job job = Job.getInstance(conf);
    job.setJobName(FindMaxPageRankNodes.class.getName() + ":" + inputPath);
    job.setJarByClass(FindMaxPageRankNodes.class);
    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

CommandLineParser (org.apache.commons.cli.CommandLineParser)265 CommandLine (org.apache.commons.cli.CommandLine)246 Options (org.apache.commons.cli.Options)206 ParseException (org.apache.commons.cli.ParseException)186 GnuParser (org.apache.commons.cli.GnuParser)158 HelpFormatter (org.apache.commons.cli.HelpFormatter)111 PosixParser (org.apache.commons.cli.PosixParser)61 Option (org.apache.commons.cli.Option)52 IOException (java.io.IOException)48 Path (org.apache.hadoop.fs.Path)42 File (java.io.File)41 DefaultParser (org.apache.commons.cli.DefaultParser)29 Job (org.apache.hadoop.mapreduce.Job)27 Configuration (org.apache.hadoop.conf.Configuration)19 FileInputStream (java.io.FileInputStream)16 Properties (java.util.Properties)15 ArrayList (java.util.ArrayList)14 BasicParser (org.apache.commons.cli.BasicParser)14 FileSystem (org.apache.hadoop.fs.FileSystem)12 URI (java.net.URI)10