Search in sources :

Example 41 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project hive by apache.

the class StoreComplex method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String[] otherArgs = new String[1];
    int j = 0;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-libjars")) {
            // generic options parser doesn't seem to work!
            conf.set("tmpjars", args[i + 1]);
            // skip it , the for loop will skip its value
            i = i + 1;
        } else {
            otherArgs[j++] = args[i];
        }
    }
    if (otherArgs.length != 1) {
        usage();
    }
    String serverUri = otherArgs[0];
    String tableName = COMPLEX_TABLE_NAME;
    String dbName = "default";
    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
    String outputTableName = null;
    outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME;
    // test with null or empty randomly
    if (new Random().nextInt(2) == 0) {
        System.err.println("INFO: output partition keys set to null for writing");
        outputPartitionKvps = null;
    }
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "storecomplex");
    // initialize HCatInputFormat
    HCatInputFormat.setInput(job, dbName, tableName);
    // initialize HCatOutputFormat
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    HCatOutputFormat.setSchema(job, s);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(HCatOutputFormat.class);
    job.setJarByClass(StoreComplex.class);
    job.setMapperClass(ComplexMapper.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HashMap(java.util.HashMap) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 42 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class MaxmindDbEnrichmentLoader method main.

public static void main(String... argv) throws IOException {
    String[] otherArgs = new GenericOptionsParser(argv).getRemainingArgs();
    CommandLine cli = GeoEnrichmentOptions.parse(new PosixParser(), otherArgs);
    MaxmindDbEnrichmentLoader loader = new MaxmindDbEnrichmentLoader();
    loader.loadGeoLiteDatabase(cli);
}
Also used : GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 43 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class SimpleEnrichmentFlatFileLoaderIntegrationTest method testArgs.

@Test
public void testArgs() throws Exception {
    String[] argv = { "-c cf", "-t enrichment", "-e extractor.json", "-n enrichment_config.json", "-l log4j", "-i input.csv", "-p 2", "-b 128", "-q" };
    String[] otherArgs = new GenericOptionsParser(config, argv).getRemainingArgs();
    CommandLine cli = LoadOptions.parse(new PosixParser(), otherArgs);
    assertEquals(extractorJson, LoadOptions.EXTRACTOR_CONFIG.get(cli).trim());
    assertEquals(cf, LoadOptions.HBASE_CF.get(cli).trim());
    assertEquals(tableName, LoadOptions.HBASE_TABLE.get(cli).trim());
    assertEquals(enrichmentJson, LoadOptions.ENRICHMENT_CONFIG.get(cli).trim());
    assertEquals(csvFile, LoadOptions.INPUT.get(cli).trim());
    assertEquals(log4jProperty, LoadOptions.LOG4J_PROPERTIES.get(cli).trim());
    assertEquals("2", LoadOptions.NUM_THREADS.get(cli).trim());
    assertEquals("128", LoadOptions.BATCH_SIZE.get(cli).trim());
}
Also used : CommandLine(org.apache.commons.cli.CommandLine) PosixParser(org.apache.commons.cli.PosixParser) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) Test(org.junit.jupiter.api.Test)

Example 44 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class MaxmindDbEnrichmentLoaderTest method testCommandLineLongOpts.

@Test
public void testCommandLineLongOpts() throws Exception {
    String[] argv = { "--geo_url", "testGeoUrl", "--remote_dir", "/test/remoteDir", "-ra", "/test/remoteDir", "--tmp_dir", "/test/tmpDir", "--zk_quorum", "test:2181" };
    String[] otherArgs = new GenericOptionsParser(argv).getRemainingArgs();
    CommandLine cli = MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.parse(new PosixParser(), otherArgs);
    assertEquals("testGeoUrl", MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.GEO_URL.get(cli).trim());
    assertEquals("/test/remoteDir", MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.REMOTE_GEO_DIR.get(cli).trim());
    assertEquals("/test/tmpDir", MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.TMP_DIR.get(cli).trim());
    assertEquals("test:2181", MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.ZK_QUORUM.get(cli).trim());
}
Also used : CommandLine(org.apache.commons.cli.CommandLine) PosixParser(org.apache.commons.cli.PosixParser) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) Test(org.junit.jupiter.api.Test)

Example 45 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project metron by apache.

the class MaxmindDbEnrichmentLoaderTest method testLoadGeoIpDatabase.

@Test
public void testLoadGeoIpDatabase() throws Exception {
    File dbPlainTextFile = new File(remoteDir.getAbsolutePath() + "/MaxmindDbEnrichmentLoaderTest.mmdb");
    TestUtils.write(dbPlainTextFile, "hello world");
    File dbFile = new File(remoteDir.getAbsolutePath() + "/MaxmindDbEnrichmentLoaderTest.mmdb.gz");
    CompressionStrategies.GZIP.compress(dbPlainTextFile, dbFile);
    String[] argv = { "--geo_url", "file://" + dbFile.getAbsolutePath(), "--remote_dir", remoteDir.getAbsolutePath(), "--remote_asn_dir", remoteDir.getAbsolutePath(), "--tmp_dir", tmpDir.getAbsolutePath(), "--zk_quorum", "test:2181" };
    String[] otherArgs = new GenericOptionsParser(argv).getRemainingArgs();
    CommandLine cli = MaxmindDbEnrichmentLoader.GeoEnrichmentOptions.parse(new PosixParser(), otherArgs);
    MaxmindDbEnrichmentLoader loader = new MockMaxmindDbEnrichmentLoader();
    loader.loadGeoLiteDatabase(cli);
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    assertTrue(fs.exists(new Path(remoteDir + "/" + dbFile.getName())));
}
Also used : Path(org.apache.hadoop.fs.Path) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.hadoop.conf.Configuration) PosixParser(org.apache.commons.cli.PosixParser) FileSystem(org.apache.hadoop.fs.FileSystem) File(java.io.File) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) Test(org.junit.jupiter.api.Test)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)102 Configuration (org.apache.hadoop.conf.Configuration)72 Path (org.apache.hadoop.fs.Path)38 Job (org.apache.hadoop.mapreduce.Job)35 CommandLine (org.apache.commons.cli.CommandLine)18 IOException (java.io.IOException)15 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)11 PosixParser (org.apache.commons.cli.PosixParser)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)9 ParseException (org.apache.commons.cli.ParseException)7 Test (org.junit.jupiter.api.Test)7 ArrayList (java.util.ArrayList)6 Options (org.apache.commons.cli.Options)6 JobConf (org.apache.hadoop.mapred.JobConf)6 File (java.io.File)5 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5