use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.
the class BooleanRetrieval method run.
/**
* Runs this tool.
*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));
CommandLine cmdline = null;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
System.exit(-1);
}
if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(BooleanRetrieval.class.getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
System.exit(-1);
}
String indexPath = cmdline.getOptionValue(INDEX);
String collectionPath = cmdline.getOptionValue(COLLECTION);
if (collectionPath.endsWith(".gz")) {
System.out.println("gzipped collection is not seekable: use compressed version!");
System.exit(-1);
}
FileSystem fs = FileSystem.get(new Configuration());
initialize(indexPath, collectionPath, fs);
String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };
for (String q : queries) {
System.out.println("Query: " + q);
runQuery(q);
System.out.println("");
}
return 1;
}
use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.
the class BuildInvertedIndex method run.
/**
* Runs this tool.
*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers").create(NUM_REDUCERS));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
return -1;
}
if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(this.getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String inputPath = cmdline.getOptionValue(INPUT);
String outputPath = cmdline.getOptionValue(OUTPUT);
int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1;
LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName());
LOG.info(" - input path: " + inputPath);
LOG.info(" - output path: " + outputPath);
LOG.info(" - num reducers: " + reduceTasks);
Job job = Job.getInstance(getConf());
job.setJobName(BuildInvertedIndex.class.getSimpleName());
job.setJarByClass(BuildInvertedIndex.class);
job.setNumReduceTasks(reduceTasks);
FileInputFormat.setInputPaths(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(PairOfInts.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(PairOfWritables.class);
job.setOutputFormatClass(MapFileOutputFormat.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// Delete the output directory if it exists already.
Path outputDir = new Path(outputPath);
FileSystem.get(getConf()).delete(outputDir, true);
long startTime = System.currentTimeMillis();
job.waitForCompletion(true);
System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
return 0;
}
use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.
the class BuildPageRankRecords method run.
/**
* Runs this tool.
*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
return -1;
}
if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(this.getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String inputPath = cmdline.getOptionValue(INPUT);
String outputPath = cmdline.getOptionValue(OUTPUT);
int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName());
LOG.info(" - inputDir: " + inputPath);
LOG.info(" - outputDir: " + outputPath);
LOG.info(" - numNodes: " + n);
Configuration conf = getConf();
conf.setInt(NODE_CNT_FIELD, n);
conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
Job job = Job.getInstance(conf);
job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath);
job.setJarByClass(BuildPageRankRecords.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(PageRankNode.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(PageRankNode.class);
job.setMapperClass(MyMapper.class);
// Delete the output directory if it exists already.
FileSystem.get(conf).delete(new Path(outputPath), true);
job.waitForCompletion(true);
return 0;
}
use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.
the class DumpPageRankRecordsToPlainText method run.
/**
* Runs this tool.
*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
return -1;
}
if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(this.getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String inputPath = cmdline.getOptionValue(INPUT);
String outputPath = cmdline.getOptionValue(OUTPUT);
LOG.info("Tool name: " + DumpPageRankRecordsToPlainText.class.getSimpleName());
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
Configuration conf = new Configuration();
conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
Job job = Job.getInstance(conf);
job.setJobName(DumpPageRankRecordsToPlainText.class.getSimpleName());
job.setJarByClass(DumpPageRankRecordsToPlainText.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(PageRankNode.class);
// Delete the output directory if it exists already.
FileSystem.get(conf).delete(new Path(outputPath), true);
job.waitForCompletion(true);
return 0;
}
use of org.apache.commons.cli.CommandLineParser in project Cloud9 by lintool.
the class FindMaxPageRankNodes method run.
/**
* Runs this tool.
*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
return -1;
}
if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(TOP)) {
System.out.println("args: " + Arrays.toString(args));
HelpFormatter formatter = new HelpFormatter();
formatter.setWidth(120);
formatter.printHelp(this.getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String inputPath = cmdline.getOptionValue(INPUT);
String outputPath = cmdline.getOptionValue(OUTPUT);
int n = Integer.parseInt(cmdline.getOptionValue(TOP));
LOG.info("Tool name: " + FindMaxPageRankNodes.class.getSimpleName());
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
LOG.info(" - top: " + n);
Configuration conf = getConf();
conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
conf.setInt("n", n);
Job job = Job.getInstance(conf);
job.setJobName(FindMaxPageRankNodes.class.getName() + ":" + inputPath);
job.setJarByClass(FindMaxPageRankNodes.class);
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(FloatWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(FloatWritable.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// Delete the output directory if it exists already.
FileSystem.get(conf).delete(new Path(outputPath), true);
job.waitForCompletion(true);
return 0;
}
Aggregations