Search in sources :

Example 76 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.

the class ReadRC method run.

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String serverUri = args[0];
    String tableName = args[1];
    String outputDir = args[2];
    String dbName = null;
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "ReadRC");
    HCatInputFormat.setInput(job, dbName, tableName);
    // initialize HCatOutputFormat
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setJarByClass(ReadRC.class);
    job.setMapperClass(Map.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return (job.waitForCompletion(true) ? 0 : 1);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 77 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.

the class ReadWrite method run.

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String serverUri = args[0];
    String inputTableName = args[1];
    String outputTableName = args[2];
    String dbName = null;
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "ReadWrite");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    // initialize HCatOutputFormat
    job.setInputFormatClass(HCatInputFormat.class);
    job.setJarByClass(ReadWrite.class);
    job.setMapperClass(Map.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    job.setOutputFormatClass(HCatOutputFormat.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 78 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.

the class DelegationTokenTool method readArgs.

private void readArgs(String[] args) throws Exception {
    args = new GenericOptionsParser(getConf(), args).getRemainingArgs();
    Options options = new Options();
    options.addOption(new Option("confLocation", true, "Location of HCat/Hive Server's hive-site."));
    options.addOption(new Option("delete", false, "Delete delegation token."));
    options.addOption(new Option("list", false, "List delegation tokens."));
    options.addOption(new Option("olderThan", true, "Filter for token's issue-date. (e.g. 3d, 1h or 4m)."));
    options.addOption(new Option("expired", false, "Select expired delegation tokens for listing/deletion."));
    options.addOption(new Option("dryRun", false, "Don't actually delete delegation tokens."));
    options.addOption(new Option("batchSize", true, "Number of tokens to drop between sleep intervals."));
    options.addOption(new Option("sleepTime", true, "Sleep-time in seconds, between batches of dropped delegation tokens."));
    options.addOption(new Option("serverMode", true, "The service from which to read delegation tokens. Should be either of [METASTORE, HIVESERVER2]."));
    CommandLine commandLine = new GnuParser().parse(options, args, // Stop on non-existent option.
    false);
    if (commandLine.hasOption("confLocation")) {
        confLocation = commandLine.getOptionValue("confLocation");
    }
    if (commandLine.hasOption("list")) {
        opType = OpType.LIST;
    } else if (commandLine.hasOption("delete")) {
        opType = OpType.DELETE;
    } else {
        throw new IllegalArgumentException("Operation must be delete, list or get!");
    }
    isDryRun = (commandLine.hasOption("dryRun"));
    if (commandLine.hasOption("expired")) {
        LOG.info("Working on expired delegation tokens!");
        timeLimitMillis = System.currentTimeMillis();
        selectForDeletion = new Predicate<DelegationTokenIdentifier>() {

            public boolean apply(DelegationTokenIdentifier input) {
                return timeLimitMillis > input.getMaxDate();
            }
        };
    } else if (commandLine.hasOption("olderThan")) {
        String olderThanLimitString = commandLine.getOptionValue("olderThan");
        switch(olderThanLimitString.charAt(olderThanLimitString.length() - 1)) {
            case 'd':
            case 'D':
                timeLimitMillis = System.currentTimeMillis() - 24 * 60 * 60 * 1000 * Integer.parseInt(olderThanLimitString.substring(0, olderThanLimitString.length() - 1));
                break;
            case 'h':
            case 'H':
                timeLimitMillis = System.currentTimeMillis() - 60 * 60 * 1000 * Integer.parseInt(olderThanLimitString.substring(0, olderThanLimitString.length() - 1));
                break;
            case 'm':
            case 'M':
                timeLimitMillis = System.currentTimeMillis() - 60 * 1000 * Integer.parseInt(olderThanLimitString.substring(0, olderThanLimitString.length() - 1));
                break;
            default:
                throw new IllegalArgumentException("Unsupported time-limit: " + olderThanLimitString);
        }
        LOG.info("Working on delegation tokens older than current-time (" + timeLimitMillis + ").");
        selectForDeletion = new Predicate<DelegationTokenIdentifier>() {

            public boolean apply(DelegationTokenIdentifier input) {
                return timeLimitMillis > input.getIssueDate();
            }
        };
    } else {
        // Neither "expired" nor "olderThan" criteria selected. This better not be an attempt to delete tokens.
        if (opType == OpType.DELETE) {
            throw new IllegalArgumentException("Attempting to delete tokens. " + "Specify deletion criteria (either expired or time-range).");
        }
    }
    if (commandLine.hasOption("batchSize")) {
        String batchSizeString = commandLine.getOptionValue("batchSize");
        batchSize = Integer.parseInt(batchSizeString);
        if (batchSize < 1) {
            LOG.warn("Invalid batch-size! (" + batchSize + ") Resetting to defaults.");
            batchSize = BATCH_SIZE_DEFAULT;
        }
        LOG.info("Batch-size for drop == " + batchSize);
    }
    if (commandLine.hasOption("sleepTime")) {
        String sleepTimeString = commandLine.getOptionValue("sleepTime");
        sleepTimeMillis = 1000 * Integer.parseInt(sleepTimeString);
        if (sleepTimeMillis <= 0) {
            LOG.warn("Invalid sleep-time! (" + sleepTimeMillis + ") Resetting to defaults.");
            sleepTimeMillis = SLEEP_TIME_MILLIS_DEFAULT;
        }
        LOG.info("Sleep between drop-batches: " + sleepTimeMillis + " milliseconds.");
    }
    if (commandLine.hasOption("serverMode")) {
        String serverModeString = commandLine.getOptionValue("serverMode").toLowerCase();
        switch(serverModeString) {
            case "metastore":
                serverMode = HadoopThriftAuthBridge.Server.ServerMode.METASTORE;
                break;
            case "hiveserver2":
                serverMode = HadoopThriftAuthBridge.Server.ServerMode.HIVESERVER2;
                break;
            default:
                throw new IllegalArgumentException("Invalid value for for serverMode (" + serverModeString + ")" + "Should be either \"METASTORE\", or \"HIVESERVER2\"");
        }
    }
    LOG.info("Running with serverMode == " + serverMode);
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) GnuParser(org.apache.commons.cli.GnuParser) Option(org.apache.commons.cli.Option) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) Predicate(com.google.common.base.Predicate)

Example 79 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project nutch by apache.

the class ParseText method main.

public static void main(String[] argv) throws Exception {
    String usage = "ParseText (-local | -dfs <namenode:port>) recno segment";
    if (argv.length < 3) {
        System.out.println("usage:" + usage);
        return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
    String[] remainingArgs = parser.getRemainingArgs();
    try (FileSystem fs = FileSystem.get(conf)) {
        int recno = Integer.parseInt(remainingArgs[0]);
        String segment = remainingArgs[1];
        String filename = new Path(segment, ParseText.DIR_NAME).toString();
        ParseText parseText = new ParseText();
        ArrayFile.Reader parseTexts = new ArrayFile.Reader(fs, filename, conf);
        parseTexts.get(recno, parseText);
        System.out.println("Retrieved " + recno + " from file " + filename);
        System.out.println(parseText);
        parseTexts.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayFile(org.apache.hadoop.io.ArrayFile) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 80 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project nutch by apache.

the class ParseData method main.

public static void main(String[] argv) throws Exception {
    String usage = "ParseData (-local | -dfs <namenode:port>) recno segment";
    if (argv.length < 3) {
        System.out.println("usage:" + usage);
        return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
    String[] remainingArgs = parser.getRemainingArgs();
    try (FileSystem fs = FileSystem.get(conf)) {
        int recno = Integer.parseInt(remainingArgs[0]);
        String segment = remainingArgs[1];
        Path file = new Path(segment, DIR_NAME);
        System.out.println("Reading from file: " + file);
        ArrayFile.Reader parses = new ArrayFile.Reader(fs, file.toString(), conf);
        ParseData parseDatum = new ParseData();
        parses.get(recno, parseDatum);
        System.out.println("Retrieved " + recno + " from file " + file);
        System.out.println(parseDatum);
        parses.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayFile(org.apache.hadoop.io.ArrayFile) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)102 Configuration (org.apache.hadoop.conf.Configuration)72 Path (org.apache.hadoop.fs.Path)38 Job (org.apache.hadoop.mapreduce.Job)35 CommandLine (org.apache.commons.cli.CommandLine)18 IOException (java.io.IOException)15 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)11 PosixParser (org.apache.commons.cli.PosixParser)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)9 ParseException (org.apache.commons.cli.ParseException)7 Test (org.junit.jupiter.api.Test)7 ArrayList (java.util.ArrayList)6 Options (org.apache.commons.cli.Options)6 JobConf (org.apache.hadoop.mapred.JobConf)6 File (java.io.File)5 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5