Search in sources :

Example 46 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class BigramRelativeFrequencyJson method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
    LOG.info("Tool name: " + BigramRelativeFrequencyJson.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);
    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyJson.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyJson.class);
    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setMapOutputKeyClass(MyTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(MyTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 47 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class BigramRelativeFrequencyTuple method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
    LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);
    Job job = Job.getInstance(getConf());
    job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName());
    job.setJarByClass(BigramRelativeFrequencyTuple.class);
    job.setNumReduceTasks(reduceTasks);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setMapOutputKeyClass(BinSedesTuple.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(BinSedesTuple.class);
    job.setOutputValueClass(FloatWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);
    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 48 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class IterateGMM method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath0 = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
    LOG.info("Tool: " + IterateGMM.class.getSimpleName());
    LOG.info(" - input path: " + inputPath0);
    String inputPath = inputPath0 + "/points";
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    int iterations = 0;
    Configuration conf = getConf();
    while (iterations == 0 || !FinishIteration(inputPath0, iterations, conf)) {
        LOG.info("** iterations: " + iterations);
        try {
            Job job = Job.getInstance(conf);
            job.setJobName(IterateGMM.class.getSimpleName());
            job.setJarByClass(IterateGMM.class);
            // set the path of the information of k clusters in this iteration
            job.getConfiguration().set("clusterpath", inputPath0 + "/cluster" + iterations);
            job.setNumReduceTasks(reduceTasks);
            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(PairOfStrings.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setMapperClass(MyMapper.class);
            job.setReducerClass(MyReducer.class);
            job.setPartitionerClass(MyPartitioner.class);
            // Delete the output directory if it exists already.
            Path outputDir = new Path(outputPath);
            FileSystem.get(getConf()).delete(outputDir, true);
            long startTime = System.currentTimeMillis();
            job.waitForCompletion(true);
            LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
            reNameFile(inputPath0, outputPath, iterations + 1, conf, reduceTasks);
        } catch (Exception exp) {
            exp.printStackTrace();
        }
        iterations++;
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) IOException(java.io.IOException) ParseException(org.apache.commons.cli.ParseException) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 49 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class LocalClusteringDriver method main.

//  private static final String input="points_input";
@SuppressWarnings({ "static-access" })
public static void main(String[] args) {
    Options options = new Options();
    options.addOption(new Option(KMEANS, "initialize with k-means"));
    options.addOption(new Option(HELP, "display help options"));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("input path").create(POINTS));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("output path").create(COMPONENTS));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("result path").create(OUTPUT));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
    }
    if (!cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LocalClusteringDriver.class.getName(), options);
        System.exit(-1);
    }
    if (cmdline.hasOption(HELP)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LocalClusteringDriver.class.getName(), options);
        System.exit(-1);
    }
    int numComponents = cmdline.hasOption(COMPONENTS) ? Integer.parseInt(cmdline.getOptionValue(COMPONENTS)) : 3;
    int numPoints = cmdline.hasOption(POINTS) ? Integer.parseInt(cmdline.getOptionValue(POINTS)) : 100000;
    String output = cmdline.getOptionValue(OUTPUT);
    System.out.println(output);
    System.out.println("Number of points: " + numPoints);
    System.out.println("Number of components in mixture: " + numComponents);
    UnivariateGaussianMixtureModel sourceModel = new UnivariateGaussianMixtureModel(numComponents);
    for (int i = 0; i < numComponents; i++) {
        PVector param = new PVector(2);
        param.array[0] = RANDOM.nextInt(100);
        param.array[1] = RANDOM.nextFloat() * 3;
        sourceModel.param[i] = param;
        sourceModel.weight[i] = RANDOM.nextInt(10) + 1;
    }
    sourceModel.normalizeWeights();
    System.out.println("Initial mixture model:\n" + sourceModel + "\n");
    // Draw points from initial mixture model and compute the n clusters
    Point[] points = sourceModel.drawRandomPoints(numPoints);
    UnivariateGaussianMixtureModel learnedModel = null;
    if (cmdline.hasOption(KMEANS)) {
        System.out.println("Running k-means to initialize clusters...");
        List<Point>[] clusters = KMeans.run(points, numComponents);
        double[] means = new double[numComponents];
        int cnt = 0;
        for (List<Point> cluster : clusters) {
            double tmp = 0.0;
            for (Point p : cluster) {
                tmp += p.value;
            }
            means[cnt] = tmp / cluster.size();
            cnt++;
        }
        System.out.println("Cluster means: " + Arrays.toString(means) + "\n");
        learnedModel = ExpectationMaximization.initialize(points, means);
    } else {
        learnedModel = ExpectationMaximization.initialize(points, numComponents);
    }
    Path outputPoi = new Path(output);
    try {
        FileSystem fs = FileSystem.get(new Configuration());
        fs.delete(outputPoi, true);
        FSDataOutputStream pointfile = fs.create(new Path(output + "/points"));
        for (int i = 0; i < numPoints; i++) {
            pointfile.write((Double.toString(points[i].value) + "\n").getBytes());
        }
        pointfile.flush();
        pointfile.close();
        FSDataOutputStream clusterfile = fs.create(new Path(output + "/cluster0"));
        for (int i = 0; i < numComponents; i++) {
            clusterfile.write((i + " " + Double.toString(learnedModel.weight[i]) + " " + learnedModel.param[i].array[0] + " " + learnedModel.param[i].array[1] + "\n").getBytes());
        }
        clusterfile.flush();
        clusterfile.close();
    } catch (IOException exp) {
        exp.printStackTrace();
    }
    System.out.println("** Ready to run EM **\n");
    System.out.println("Initial mixture model:\n" + learnedModel + "\n");
    learnedModel = ExpectationMaximization.run(points, learnedModel);
    System.out.println("Mixure model estimated using EM: \n" + learnedModel + "\n");
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) IOException(java.io.IOException) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FileSystem(org.apache.hadoop.fs.FileSystem) Option(org.apache.commons.cli.Option) List(java.util.List) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 50 with HelpFormatter

use of org.apache.commons.cli.HelpFormatter in project Cloud9 by lintool.

the class BooleanRetrieval method run.

/**
   * Runs this tool.
   */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));
    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }
    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(BooleanRetrieval.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }
    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);
    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }
    FileSystem fs = FileSystem.get(new Configuration());
    initialize(indexPath, collectionPath, fs);
    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };
    for (String q : queries) {
        System.out.println("Query: " + q);
        runQuery(q);
        System.out.println("");
    }
    return 1;
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException)

Aggregations

HelpFormatter (org.apache.commons.cli.HelpFormatter)273 Options (org.apache.commons.cli.Options)136 CommandLine (org.apache.commons.cli.CommandLine)126 CommandLineParser (org.apache.commons.cli.CommandLineParser)110 ParseException (org.apache.commons.cli.ParseException)103 GnuParser (org.apache.commons.cli.GnuParser)92 Path (org.apache.hadoop.fs.Path)42 PrintWriter (java.io.PrintWriter)35 Option (org.apache.commons.cli.Option)29 Job (org.apache.hadoop.mapreduce.Job)27 Configuration (org.apache.hadoop.conf.Configuration)21 File (java.io.File)17 IOException (java.io.IOException)14 DefaultParser (org.apache.commons.cli.DefaultParser)13 PosixParser (org.apache.commons.cli.PosixParser)12 FileSystem (org.apache.hadoop.fs.FileSystem)12 BasicParser (org.apache.commons.cli.BasicParser)11 ArrayList (java.util.ArrayList)8 URI (java.net.URI)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6