use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.
the class DocumentForwardIndexHttpServer method run.
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) forward index path").create(INDEX_OPTION));
options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) DocnoMapping data path").create(MAPPING_OPTION));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
System.err.println("Error parsing command line: " + exp.getMessage());
return -1;
}
if (!cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(MAPPING_OPTION)) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(this.getClass().getName(), options);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
String indexFile = cmdline.getOptionValue(INDEX_OPTION);
String mappingFile = cmdline.getOptionValue(MAPPING_OPTION);
LOG.info("Launching DocumentForwardIndexHttpServer");
LOG.info(" - index file: " + indexFile);
LOG.info(" - docno mapping data file: " + mappingFile);
Configuration conf = getConf();
FileSystem fs = FileSystem.get(conf);
Random rand = new Random();
int r = rand.nextInt();
// This tmp file as a rendezvous point.
Path tmpPath = new Path("/tmp/" + r);
if (fs.exists(tmpPath)) {
fs.delete(tmpPath, true);
}
Job job = new Job(conf, DocumentForwardIndexHttpServer.class.getSimpleName());
job.setJarByClass(DocumentForwardIndexHttpServer.class);
job.getConfiguration().set("mapred.child.java.opts", "-Xmx1024m");
job.getConfiguration().set(INDEX_KEY, indexFile);
job.getConfiguration().set(DOCNO_MAPPING_KEY, mappingFile);
job.getConfiguration().set(TMP_KEY, tmpPath.toString());
job.setNumReduceTasks(0);
job.setInputFormatClass(NullInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapperClass(MyMapper.class);
job.submit();
LOG.info("Waiting for server to start up...");
while (!fs.exists(tmpPath)) {
Thread.sleep(50000);
LOG.info("...");
}
FSDataInputStream in = fs.open(tmpPath);
String host = in.readUTF();
in.close();
LOG.info("host: " + host);
LOG.info("port: 8888");
return 0;
}
use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.
the class HadoopAlign method main.
@SuppressWarnings("static-access")
public static void main(String[] args) throws IOException {
options = new Options();
options.addOption(OptionBuilder.withDescription("path to XML-formatted parallel corpus").withArgName("path").hasArg().isRequired().create(INPUT_OPTION));
options.addOption(OptionBuilder.withDescription("path to work/output directory on HDFS").withArgName("path").hasArg().isRequired().create(WORK_OPTION));
options.addOption(OptionBuilder.withDescription("two-letter collection language code").withArgName("en|de|fr|zh|es|ar|tr").hasArg().isRequired().create(FLANG_OPTION));
options.addOption(OptionBuilder.withDescription("two-letter collection language code").withArgName("en|de|fr|zh|es|ar|tr").hasArg().isRequired().create(ELANG_OPTION));
options.addOption(OptionBuilder.withDescription("number of IBM Model 1 iterations").withArgName("positive integer").hasArg().create(MODEL1_OPTION));
options.addOption(OptionBuilder.withDescription("number of HMM iterations").withArgName("positive integer").hasArg().create(HMM_OPTION));
options.addOption(OptionBuilder.withDescription("truncate/stem text or not").create(TRUNCATE_OPTION));
options.addOption(OptionBuilder.withDescription("number of reducers").withArgName("positive integer").hasArg().create(REDUCE_OPTION));
options.addOption(OptionBuilder.withDescription("Hadoop option to load external jars").withArgName("jar packages").hasArg().create(LIBJARS_OPTION));
CommandLine cmdline;
CommandLineParser parser = new GnuParser();
try {
cmdline = parser.parse(options, args);
} catch (ParseException exp) {
printUsage();
System.err.println("Error parsing command line: " + exp.getMessage());
return;
}
String bitextPath = cmdline.getOptionValue(INPUT_OPTION);
String workDir = cmdline.getOptionValue(WORK_OPTION);
String srcLang = cmdline.getOptionValue(FLANG_OPTION);
String trgLang = cmdline.getOptionValue(ELANG_OPTION);
int model1Iters = cmdline.hasOption(MODEL1_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MODEL1_OPTION)) : 0;
int hmmIters = cmdline.hasOption(HMM_OPTION) ? Integer.parseInt(cmdline.getOptionValue(HMM_OPTION)) : 0;
if (model1Iters + hmmIters == 0) {
System.err.println("Please enter a positive number of iterations for either Model 1 or HMM");
printUsage();
return;
}
boolean isTruncate = cmdline.hasOption(TRUNCATE_OPTION) ? true : false;
int numReducers = cmdline.hasOption(REDUCE_OPTION) ? Integer.parseInt(cmdline.getOptionValue(REDUCE_OPTION)) : 50;
HadoopAlignConfig hac = new HadoopAlignConfig(workDir, trgLang, srcLang, bitextPath, model1Iters, hmmIters, // use null word
true, // use variational bayes
false, // use word truncation
isTruncate, // alpha
0.00f);
hac.setHMMHomogeneous(false);
hac.set("mapreduce.map.memory.mb", "2048");
hac.set("mapreduce.map.java.opts", "-Xmx2048m");
hac.set("mapreduce.reduce.memory.mb", "2048");
hac.set("mapreduce.reduce.java.opts", "-Xmx2048m");
hac.setHMMp0(0.2);
hac.setMaxSentLen(15);
doAlignment(50, numReducers, hac);
}
use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.
the class FileMerger method run.
@Override
public /**
* TODO: add in hadoop configuration
*/
int run(String[] args) throws IOException {
Options options = new Options();
options.addOption(HELP_OPTION, false, "print the help message");
options.addOption(OptionBuilder.withArgName(PATH_INDICATOR).hasArg().withDescription("input file or directory").create(INPUT_OPTION));
options.addOption(OptionBuilder.withArgName(PATH_INDICATOR).hasArg().withDescription("output file").create(OUTPUT_OPTION));
options.addOption(OptionBuilder.withArgName(INTEGER_INDICATOR).hasArg().withDescription("number of mappers (default to 0 and hence local merge mode, set to positive value to enable cluster merge mode)").create(MAPPER_OPTION));
options.addOption(OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator().withDescription("assign value for given property").create("D"));
options.addOption(TEXT_FILE_INPUT_FORMAT, false, "input file in sequence format");
options.addOption(DELETE_SOURCE_OPTION, false, "delete sources after merging");
int mapperTasks = 0;
boolean deleteSource = DELETE_SOURCE;
boolean textFileFormat = TEXT_FILE_INPUT;
String inputPath = "";
String outputPath = "";
GenericOptionsParser genericOptionsParser = new GenericOptionsParser(args);
Configuration configuration = genericOptionsParser.getConfiguration();
CommandLineParser parser = new GnuParser();
HelpFormatter formatter = new HelpFormatter();
try {
CommandLine line = parser.parse(options, args);
if (line.hasOption(HELP_OPTION)) {
formatter.printHelp(FileMerger.class.getName(), options);
System.exit(0);
}
if (line.hasOption(INPUT_OPTION)) {
inputPath = line.getOptionValue(INPUT_OPTION);
} else {
throw new ParseException("Parsing failed due to " + INPUT_OPTION + " not initialized...");
}
if (line.hasOption(OUTPUT_OPTION)) {
outputPath = line.getOptionValue(OUTPUT_OPTION);
} else {
throw new ParseException("Parsing failed due to " + OUTPUT_OPTION + " not initialized...");
}
if (line.hasOption(MAPPER_OPTION)) {
mapperTasks = Integer.parseInt(line.getOptionValue(MAPPER_OPTION));
if (mapperTasks <= 0) {
sLogger.info("Warning: " + MAPPER_OPTION + " is not positive, merge in local model...");
mapperTasks = 0;
}
}
if (line.hasOption(DELETE_SOURCE_OPTION)) {
deleteSource = true;
}
if (line.hasOption(TEXT_FILE_INPUT_FORMAT)) {
textFileFormat = true;
}
} catch (ParseException pe) {
System.err.println(pe.getMessage());
formatter.printHelp(FileMerger.class.getName(), options);
System.exit(0);
} catch (NumberFormatException nfe) {
System.err.println(nfe.getMessage());
System.exit(0);
}
try {
merge(configuration, inputPath, outputPath, mapperTasks, textFileFormat, deleteSource);
} catch (InstantiationException ie) {
ie.printStackTrace();
} catch (IllegalAccessException iae) {
iae.printStackTrace();
}
return 0;
}
use of org.apache.commons.cli.CommandLine in project databus by linkedin.
the class BootstrapConfigBase method loadConfigProperties.
@SuppressWarnings("static-access")
public static Properties loadConfigProperties(String[] args) throws IOException {
CommandLineParser cliParser = new GnuParser();
Option dbOption = OptionBuilder.withLongOpt(BOOTSTRAP_DB_PROPS_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(BOOTSTRAP_DB_PROP_OPT_CHAR);
Options options = new Options();
options.addOption(dbOption);
CommandLine cmd = null;
try {
cmd = cliParser.parse(options, args);
} catch (ParseException pe) {
throw new RuntimeException("BootstrapConfig: failed to parse command-line options.", pe);
}
Properties props = null;
if (cmd.hasOption(BOOTSTRAP_DB_PROP_OPT_CHAR)) {
String propFile = cmd.getOptionValue(BOOTSTRAP_DB_PROP_OPT_CHAR);
LOG.info("Loading bootstrap DB config from properties file " + propFile);
props = new Properties();
FileInputStream f = new FileInputStream(propFile);
try {
props.load(f);
} finally {
if (null != f)
f.close();
}
} else {
LOG.info("Using system properties for bootstrap DB config");
}
return props;
}
use of org.apache.commons.cli.CommandLine in project databus by linkedin.
the class BootstrapAvroFileSeederMain method parseArgs.
@SuppressWarnings("static-access")
public static void parseArgs(String[] args) throws IOException {
CommandLineParser cliParser = new GnuParser();
Option helpOption = OptionBuilder.withLongOpt(HELP_OPT_LONG_NAME).withDescription("Help screen").create(HELP_OPT_CHAR);
Option sourcesOption = OptionBuilder.withLongOpt(PHYSICAL_CONFIG_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(PHYSICAL_CONFIG_OPT_CHAR);
Option dbOption = OptionBuilder.withLongOpt(BOOTSTRAP_DB_PROPS_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(BOOTSTRAP_DB_PROP_OPT_CHAR);
Option log4jPropsOption = OptionBuilder.withLongOpt(LOG4J_PROPS_OPT_LONG_NAME).withDescription("Log4j properties to use").hasArg().withArgName("property_file").create(LOG4J_PROPS_OPT_CHAR);
Options options = new Options();
options.addOption(helpOption);
options.addOption(sourcesOption);
options.addOption(dbOption);
options.addOption(log4jPropsOption);
CommandLine cmd = null;
try {
cmd = cliParser.parse(options, args);
} catch (ParseException pe) {
LOG.fatal("Bootstrap Physical Config: failed to parse command-line options.", pe);
throw new RuntimeException("Bootstrap Physical Config: failed to parse command-line options.", pe);
}
if (cmd.hasOption(LOG4J_PROPS_OPT_CHAR)) {
String log4jPropFile = cmd.getOptionValue(LOG4J_PROPS_OPT_CHAR);
PropertyConfigurator.configure(log4jPropFile);
LOG.info("Using custom logging settings from file " + log4jPropFile);
} else {
PatternLayout defaultLayout = new PatternLayout("%d{ISO8601} +%r [%t] (%p) {%c} %m%n");
ConsoleAppender defaultAppender = new ConsoleAppender(defaultLayout);
Logger.getRootLogger().removeAllAppenders();
Logger.getRootLogger().addAppender(defaultAppender);
//using info as the default log level
Logger.getRootLogger().setLevel(Level.INFO);
LOG.info("Using default logging settings. Log Level is :" + Logger.getRootLogger().getLevel());
}
if (cmd.hasOption(HELP_OPT_CHAR)) {
printCliHelp(options);
System.exit(0);
}
if (!cmd.hasOption(PHYSICAL_CONFIG_OPT_CHAR))
throw new RuntimeException("Sources Config is not provided; use --help for usage");
if (!cmd.hasOption(BOOTSTRAP_DB_PROP_OPT_CHAR))
throw new RuntimeException("Bootstrap config is not provided; use --help for usage");
_sSourcesConfigFile = cmd.getOptionValue(PHYSICAL_CONFIG_OPT_CHAR);
String propFile = cmd.getOptionValue(BOOTSTRAP_DB_PROP_OPT_CHAR);
LOG.info("Loading bootstrap DB config from properties file " + propFile);
_sBootstrapConfigProps = new Properties();
FileInputStream fis = new FileInputStream(propFile);
try {
_sBootstrapConfigProps.load(fis);
} finally {
fis.close();
}
}
Aggregations