Search in sources :

Example 36 with CommandLine

use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.

the class DocumentForwardIndexHttpServer method run.

@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) forward index path").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) DocnoMapping data path").create(MAPPING_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(MAPPING_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String indexFile = cmdline.getOptionValue(INDEX_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_OPTION);
    LOG.info("Launching DocumentForwardIndexHttpServer");
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - docno mapping data file: " + mappingFile);
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Random rand = new Random();
    int r = rand.nextInt();
    // This tmp file as a rendezvous point.
    Path tmpPath = new Path("/tmp/" + r);
    if (fs.exists(tmpPath)) {
        fs.delete(tmpPath, true);
    }
    Job job = new Job(conf, DocumentForwardIndexHttpServer.class.getSimpleName());
    job.setJarByClass(DocumentForwardIndexHttpServer.class);
    job.getConfiguration().set("mapred.child.java.opts", "-Xmx1024m");
    job.getConfiguration().set(INDEX_KEY, indexFile);
    job.getConfiguration().set(DOCNO_MAPPING_KEY, mappingFile);
    job.getConfiguration().set(TMP_KEY, tmpPath.toString());
    job.setNumReduceTasks(0);
    job.setInputFormatClass(NullInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.submit();
    LOG.info("Waiting for server to start up...");
    while (!fs.exists(tmpPath)) {
        Thread.sleep(50000);
        LOG.info("...");
    }
    FSDataInputStream in = fs.open(tmpPath);
    String host = in.readUTF();
    in.close();
    LOG.info("host: " + host);
    LOG.info("port: 8888");
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 37 with CommandLine

use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.

the class HadoopAlign method main.

@SuppressWarnings("static-access")
public static void main(String[] args) throws IOException {
    options = new Options();
    options.addOption(OptionBuilder.withDescription("path to XML-formatted parallel corpus").withArgName("path").hasArg().isRequired().create(INPUT_OPTION));
    options.addOption(OptionBuilder.withDescription("path to work/output directory on HDFS").withArgName("path").hasArg().isRequired().create(WORK_OPTION));
    options.addOption(OptionBuilder.withDescription("two-letter collection language code").withArgName("en|de|fr|zh|es|ar|tr").hasArg().isRequired().create(FLANG_OPTION));
    options.addOption(OptionBuilder.withDescription("two-letter collection language code").withArgName("en|de|fr|zh|es|ar|tr").hasArg().isRequired().create(ELANG_OPTION));
    options.addOption(OptionBuilder.withDescription("number of IBM Model 1 iterations").withArgName("positive integer").hasArg().create(MODEL1_OPTION));
    options.addOption(OptionBuilder.withDescription("number of HMM iterations").withArgName("positive integer").hasArg().create(HMM_OPTION));
    options.addOption(OptionBuilder.withDescription("truncate/stem text or not").create(TRUNCATE_OPTION));
    options.addOption(OptionBuilder.withDescription("number of reducers").withArgName("positive integer").hasArg().create(REDUCE_OPTION));
    options.addOption(OptionBuilder.withDescription("Hadoop option to load external jars").withArgName("jar packages").hasArg().create(LIBJARS_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        printUsage();
        System.err.println("Error parsing command line: " + exp.getMessage());
        return;
    }
    String bitextPath = cmdline.getOptionValue(INPUT_OPTION);
    String workDir = cmdline.getOptionValue(WORK_OPTION);
    String srcLang = cmdline.getOptionValue(FLANG_OPTION);
    String trgLang = cmdline.getOptionValue(ELANG_OPTION);
    int model1Iters = cmdline.hasOption(MODEL1_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MODEL1_OPTION)) : 0;
    int hmmIters = cmdline.hasOption(HMM_OPTION) ? Integer.parseInt(cmdline.getOptionValue(HMM_OPTION)) : 0;
    if (model1Iters + hmmIters == 0) {
        System.err.println("Please enter a positive number of iterations for either Model 1 or HMM");
        printUsage();
        return;
    }
    boolean isTruncate = cmdline.hasOption(TRUNCATE_OPTION) ? true : false;
    int numReducers = cmdline.hasOption(REDUCE_OPTION) ? Integer.parseInt(cmdline.getOptionValue(REDUCE_OPTION)) : 50;
    HadoopAlignConfig hac = new HadoopAlignConfig(workDir, trgLang, srcLang, bitextPath, model1Iters, hmmIters, // use null word
    true, // use variational bayes
    false, // use word truncation
    isTruncate, // alpha
    0.00f);
    hac.setHMMHomogeneous(false);
    hac.set("mapreduce.map.memory.mb", "2048");
    hac.set("mapreduce.map.java.opts", "-Xmx2048m");
    hac.set("mapreduce.reduce.memory.mb", "2048");
    hac.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    hac.setHMMp0(0.2);
    hac.setMaxSentLen(15);
    doAlignment(50, numReducers, hac);
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException)

Example 38 with CommandLine

use of org.apache.commons.cli.CommandLine in project Cloud9 by lintool.

the class FileMerger method run.

@Override
public /**
   * TODO: add in hadoop configuration
   */
int run(String[] args) throws IOException {
    Options options = new Options();
    options.addOption(HELP_OPTION, false, "print the help message");
    options.addOption(OptionBuilder.withArgName(PATH_INDICATOR).hasArg().withDescription("input file or directory").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(PATH_INDICATOR).hasArg().withDescription("output file").create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName(INTEGER_INDICATOR).hasArg().withDescription("number of mappers (default to 0 and hence local merge mode, set to positive value to enable cluster merge mode)").create(MAPPER_OPTION));
    options.addOption(OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator().withDescription("assign value for given property").create("D"));
    options.addOption(TEXT_FILE_INPUT_FORMAT, false, "input file in sequence format");
    options.addOption(DELETE_SOURCE_OPTION, false, "delete sources after merging");
    int mapperTasks = 0;
    boolean deleteSource = DELETE_SOURCE;
    boolean textFileFormat = TEXT_FILE_INPUT;
    String inputPath = "";
    String outputPath = "";
    GenericOptionsParser genericOptionsParser = new GenericOptionsParser(args);
    Configuration configuration = genericOptionsParser.getConfiguration();
    CommandLineParser parser = new GnuParser();
    HelpFormatter formatter = new HelpFormatter();
    try {
        CommandLine line = parser.parse(options, args);
        if (line.hasOption(HELP_OPTION)) {
            formatter.printHelp(FileMerger.class.getName(), options);
            System.exit(0);
        }
        if (line.hasOption(INPUT_OPTION)) {
            inputPath = line.getOptionValue(INPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + INPUT_OPTION + " not initialized...");
        }
        if (line.hasOption(OUTPUT_OPTION)) {
            outputPath = line.getOptionValue(OUTPUT_OPTION);
        } else {
            throw new ParseException("Parsing failed due to " + OUTPUT_OPTION + " not initialized...");
        }
        if (line.hasOption(MAPPER_OPTION)) {
            mapperTasks = Integer.parseInt(line.getOptionValue(MAPPER_OPTION));
            if (mapperTasks <= 0) {
                sLogger.info("Warning: " + MAPPER_OPTION + " is not positive, merge in local model...");
                mapperTasks = 0;
            }
        }
        if (line.hasOption(DELETE_SOURCE_OPTION)) {
            deleteSource = true;
        }
        if (line.hasOption(TEXT_FILE_INPUT_FORMAT)) {
            textFileFormat = true;
        }
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
        formatter.printHelp(FileMerger.class.getName(), options);
        System.exit(0);
    } catch (NumberFormatException nfe) {
        System.err.println(nfe.getMessage());
        System.exit(0);
    }
    try {
        merge(configuration, inputPath, outputPath, mapperTasks, textFileFormat, deleteSource);
    } catch (InstantiationException ie) {
        ie.printStackTrace();
    } catch (IllegalAccessException iae) {
        iae.printStackTrace();
    }
    return 0;
}
Also used : Options(org.apache.commons.cli.Options) Configuration(org.apache.hadoop.conf.Configuration) GnuParser(org.apache.commons.cli.GnuParser) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 39 with CommandLine

use of org.apache.commons.cli.CommandLine in project databus by linkedin.

the class BootstrapConfigBase method loadConfigProperties.

@SuppressWarnings("static-access")
public static Properties loadConfigProperties(String[] args) throws IOException {
    CommandLineParser cliParser = new GnuParser();
    Option dbOption = OptionBuilder.withLongOpt(BOOTSTRAP_DB_PROPS_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(BOOTSTRAP_DB_PROP_OPT_CHAR);
    Options options = new Options();
    options.addOption(dbOption);
    CommandLine cmd = null;
    try {
        cmd = cliParser.parse(options, args);
    } catch (ParseException pe) {
        throw new RuntimeException("BootstrapConfig: failed to parse command-line options.", pe);
    }
    Properties props = null;
    if (cmd.hasOption(BOOTSTRAP_DB_PROP_OPT_CHAR)) {
        String propFile = cmd.getOptionValue(BOOTSTRAP_DB_PROP_OPT_CHAR);
        LOG.info("Loading bootstrap DB config from properties file " + propFile);
        props = new Properties();
        FileInputStream f = new FileInputStream(propFile);
        try {
            props.load(f);
        } finally {
            if (null != f)
                f.close();
        }
    } else {
        LOG.info("Using system properties for bootstrap DB config");
    }
    return props;
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) Option(org.apache.commons.cli.Option) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Properties(java.util.Properties) FileInputStream(java.io.FileInputStream)

Example 40 with CommandLine

use of org.apache.commons.cli.CommandLine in project databus by linkedin.

the class BootstrapAvroFileSeederMain method parseArgs.

@SuppressWarnings("static-access")
public static void parseArgs(String[] args) throws IOException {
    CommandLineParser cliParser = new GnuParser();
    Option helpOption = OptionBuilder.withLongOpt(HELP_OPT_LONG_NAME).withDescription("Help screen").create(HELP_OPT_CHAR);
    Option sourcesOption = OptionBuilder.withLongOpt(PHYSICAL_CONFIG_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(PHYSICAL_CONFIG_OPT_CHAR);
    Option dbOption = OptionBuilder.withLongOpt(BOOTSTRAP_DB_PROPS_OPT_LONG_NAME).withDescription("Bootstrap producer properties to use").hasArg().withArgName("property_file").create(BOOTSTRAP_DB_PROP_OPT_CHAR);
    Option log4jPropsOption = OptionBuilder.withLongOpt(LOG4J_PROPS_OPT_LONG_NAME).withDescription("Log4j properties to use").hasArg().withArgName("property_file").create(LOG4J_PROPS_OPT_CHAR);
    Options options = new Options();
    options.addOption(helpOption);
    options.addOption(sourcesOption);
    options.addOption(dbOption);
    options.addOption(log4jPropsOption);
    CommandLine cmd = null;
    try {
        cmd = cliParser.parse(options, args);
    } catch (ParseException pe) {
        LOG.fatal("Bootstrap Physical Config: failed to parse command-line options.", pe);
        throw new RuntimeException("Bootstrap Physical Config: failed to parse command-line options.", pe);
    }
    if (cmd.hasOption(LOG4J_PROPS_OPT_CHAR)) {
        String log4jPropFile = cmd.getOptionValue(LOG4J_PROPS_OPT_CHAR);
        PropertyConfigurator.configure(log4jPropFile);
        LOG.info("Using custom logging settings from file " + log4jPropFile);
    } else {
        PatternLayout defaultLayout = new PatternLayout("%d{ISO8601} +%r [%t] (%p) {%c} %m%n");
        ConsoleAppender defaultAppender = new ConsoleAppender(defaultLayout);
        Logger.getRootLogger().removeAllAppenders();
        Logger.getRootLogger().addAppender(defaultAppender);
        //using info as the default log level
        Logger.getRootLogger().setLevel(Level.INFO);
        LOG.info("Using default logging settings. Log Level is :" + Logger.getRootLogger().getLevel());
    }
    if (cmd.hasOption(HELP_OPT_CHAR)) {
        printCliHelp(options);
        System.exit(0);
    }
    if (!cmd.hasOption(PHYSICAL_CONFIG_OPT_CHAR))
        throw new RuntimeException("Sources Config is not provided; use --help for usage");
    if (!cmd.hasOption(BOOTSTRAP_DB_PROP_OPT_CHAR))
        throw new RuntimeException("Bootstrap config is not provided; use --help for usage");
    _sSourcesConfigFile = cmd.getOptionValue(PHYSICAL_CONFIG_OPT_CHAR);
    String propFile = cmd.getOptionValue(BOOTSTRAP_DB_PROP_OPT_CHAR);
    LOG.info("Loading bootstrap DB config from properties file " + propFile);
    _sBootstrapConfigProps = new Properties();
    FileInputStream fis = new FileInputStream(propFile);
    try {
        _sBootstrapConfigProps.load(fis);
    } finally {
        fis.close();
    }
}
Also used : ConsoleAppender(org.apache.log4j.ConsoleAppender) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) PatternLayout(org.apache.log4j.PatternLayout) GnuParser(org.apache.commons.cli.GnuParser) Option(org.apache.commons.cli.Option) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Properties(java.util.Properties) FileInputStream(java.io.FileInputStream)

Aggregations

CommandLine (org.apache.commons.cli.CommandLine)474 Options (org.apache.commons.cli.Options)293 CommandLineParser (org.apache.commons.cli.CommandLineParser)275 ParseException (org.apache.commons.cli.ParseException)260 GnuParser (org.apache.commons.cli.GnuParser)203 HelpFormatter (org.apache.commons.cli.HelpFormatter)154 IOException (java.io.IOException)108 PosixParser (org.apache.commons.cli.PosixParser)97 File (java.io.File)84 Option (org.apache.commons.cli.Option)73 Path (org.apache.hadoop.fs.Path)59 DefaultParser (org.apache.commons.cli.DefaultParser)55 Configuration (org.apache.hadoop.conf.Configuration)39 ArrayList (java.util.ArrayList)31 Job (org.apache.hadoop.mapreduce.Job)29 BasicParser (org.apache.commons.cli.BasicParser)23 FileInputStream (java.io.FileInputStream)21 Properties (java.util.Properties)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 List (java.util.List)16