Search in sources :

Example 26 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project Gaffer by gchq.

the class AddElementsFromHdfsHandler method fetchElements.

private void fetchElements(final AddElementsFromHdfs operation, final AccumuloStore store) throws OperationException {
    final int response;
    try {
        /* Parse any Hadoop arguments passed on the command line and use these to configure the Tool */
        final Configuration configuration = new GenericOptionsParser(operation.getCommandLineArgs()).getConfiguration();
        final AddElementsFromHdfsTool fetchTool = new AddElementsFromHdfsTool(new AccumuloAddElementsFromHdfsJobFactory(configuration), operation, store);
        LOGGER.info("Running FetchElementsFromHdfsTool job");
        response = ToolRunner.run(fetchTool, operation.getCommandLineArgs());
        LOGGER.info("Finished running FetchElementsFromHdfsTool job");
    } catch (final Exception e) {
        LOGGER.error("Failed to fetch elements from HDFS: {}", e.getMessage());
        throw new OperationException("Failed to fetch elements from HDFS", e);
    }
    if (AddElementsFromHdfsTool.SUCCESS_RESPONSE != response) {
        LOGGER.error("Failed to fetch elements from HDFS. Response code was {}", response);
        throw new OperationException("Failed to fetch elements from HDFS. Response code was: " + response);
    }
}
Also used : AccumuloAddElementsFromHdfsJobFactory(uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.factory.AccumuloAddElementsFromHdfsJobFactory) Configuration(org.apache.hadoop.conf.Configuration) AddElementsFromHdfsTool(uk.gov.gchq.gaffer.hdfs.operation.handler.job.tool.AddElementsFromHdfsTool) StoreException(uk.gov.gchq.gaffer.store.StoreException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 27 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class StatsModelProcessor method runCorrMapReduceJob.

private void runCorrMapReduceJob() throws IOException, InterruptedException, ClassNotFoundException {
    SourceType source = this.modelConfig.getDataSet().getSource();
    final Configuration conf = new Configuration();
    String modelConfigPath = ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString();
    String columnConfigPath = ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString();
    // add jars and files to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", modelConfigPath + "," + columnConfigPath });
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
    conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString());
    conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString());
    conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
    // too many data needed to be transfered to reducer, set default completed maps to a smaller one 0.7 to start
    // copy data in reducer earlier.
    conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.7"));
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    boolean isFastCorrelation = Environment.getProperty("shifu.correlation.fast", "false").equalsIgnoreCase(Boolean.TRUE.toString());
    int threads = parseThreadNum();
    conf.setInt("mapreduce.map.cpu.vcores", threads);
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
    // -Dmapreduce.map.java.opts=-Xmx3000M'
    if (System.getProperty("mapreduce.map.memory.mb") == null || System.getProperty("mapreduce.map.java.opts") == null) {
        setMapperMemory(conf, threads, isFastCorrelation);
    } else {
        conf.set("mapreduce.map.memory.mb", System.getProperty("mapreduce.map.memory.mb"));
        conf.set("mapreduce.map.java.opts", System.getProperty("mapreduce.map.java.opts"));
        log.info("Corrrelation map memory is set to {}MB from command line parameters.", System.getProperty("mapreduce.map.memory.mb"));
    }
    @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Correlation Computing Job : " + this.modelConfig.getModelSetName());
    job.setJarByClass(getClass());
    if (isFastCorrelation) {
        job.setMapperClass(FastCorrelationMultithreadedMapper.class);
        FastCorrelationMultithreadedMapper.setMapperClass(job, FastCorrelationMapper.class);
        FastCorrelationMultithreadedMapper.setNumberOfThreads(job, threads);
    } else {
        job.setMapperClass(CorrelationMultithreadedMapper.class);
        CorrelationMultithreadedMapper.setMapperClass(job, CorrelationMapper.class);
        CorrelationMultithreadedMapper.setNumberOfThreads(job, threads);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(CorrelationWritable.class);
    job.setInputFormatClass(CombineInputFormat.class);
    FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.modelConfig.getDataSetRawPath())));
    job.setReducerClass(CorrelationReducer.class);
    // 3000 features will be 30 reducers, 600 will be 6, much more reducer to avoid data all copied to one reducer
    // especially when features over 3000, each mapper output is 700M, 400 mapper will be 280G size
    job.setNumReduceTasks(this.columnConfigList.size() < 50 ? 2 : this.columnConfigList.size() / 50);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    String corrPath = super.getPathFinder().getCorrelationPath(source);
    FileOutputFormat.setOutputPath(job, new Path(corrPath));
    // clean output firstly
    ShifuFileUtils.deleteFile(corrPath, source);
    // submit job
    if (job.waitForCompletion(true)) {
        dumpAndCalculateCorrelationResult(source, corrPath);
    } else {
        throw new RuntimeException("MapReduce Correlation Computing Job failed.");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 28 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class VarSelectModelProcessor method prepareSEJobConf.

private void prepareSEJobConf(SourceType source, final Configuration conf) throws IOException {
    String modelConfigPath = ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString();
    String columnConfigPath = ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString();
    // only the first model is sued for sensitivity analysis
    String seModelPath = ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelsPath(), "model0." + modelConfig.getAlgorithm().toLowerCase())).toString();
    String filePath = modelConfigPath + "," + columnConfigPath + "," + seModelPath;
    // add jars and files to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", filePath });
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true);
    conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString());
    conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString());
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
    // set mapreduce.job.max.split.locations to 100 to suppress warnings
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
    // Tmp set to false because of some cluster by default use gzip while CombineInputFormat will split gzip file (a
    // bug)
    conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, false);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.9"));
    conf.set(Constants.SHIFU_VARSELECT_FILTEROUT_TYPE, modelConfig.getVarSelectFilterBy());
    Float filterOutRatio = this.modelConfig.getVarSelect().getFilterOutRatio();
    if (filterOutRatio == null) {
        log.warn("filterOutRatio in var select is not set. Using default value 0.05.");
        filterOutRatio = 0.05f;
    }
    if (filterOutRatio.compareTo(Float.valueOf(1.0f)) >= 0) {
        throw new IllegalArgumentException("WrapperRatio should be in (0, 1).");
    }
    conf.setFloat(Constants.SHIFU_VARSELECT_FILTEROUT_RATIO, filterOutRatio);
    conf.setInt(Constants.SHIFU_VARSELECT_FILTER_NUM, this.modelConfig.getVarSelectFilterNum());
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
    // no matter how the mapreduce.task.io.sort.mb is set for sensitivity job, only 1 reducer and each mapper only
    // output column stats, 150MB is enough.
    conf.setInt("mapreduce.task.io.sort.mb", 150);
}
Also used : Path(org.apache.hadoop.fs.Path) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 29 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class MapReduceShuffle method run.

public void run(String rawNormPath) throws IOException, ClassNotFoundException, InterruptedException {
    RawSourceData.SourceType source = this.modelConfig.getDataSet().getSource();
    final Configuration conf = new Configuration();
    // add jars to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() });
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true);
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 100);
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
    int shuffleSize = getDataShuffleSize(rawNormPath, source);
    log.info("Try to shuffle data into - {} parts.", shuffleSize);
    conf.set(Constants.SHIFU_NORM_SHUFFLE_SIZE, Integer.toString(shuffleSize));
    Job job = Job.getInstance(conf, "Shifu: Shuffling normalized data - " + this.modelConfig.getModelSetName());
    job.setJarByClass(getClass());
    job.setMapperClass(DataShuffle.ShuffleMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setPartitionerClass(DataShuffle.KvalPartitioner.class);
    job.setReducerClass(DataShuffle.ShuffleReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(shuffleSize);
    FileInputFormat.setInputPaths(job, rawNormPath);
    FileOutputFormat.setOutputPath(job, new Path(this.pathFinder.getShuffleDataPath()));
    // clean output firstly
    ShifuFileUtils.deleteFile(this.pathFinder.getShuffleDataPath(), source);
    // submit job
    if (job.waitForCompletion(true)) {
        // copy pig header and schema file at first to make sure such two files are at final output
        if (ShifuFileUtils.isFileExists(new Path(rawNormPath, ".pig_header"), source)) {
            ShifuFileUtils.moveTo(new Path(rawNormPath, ".pig_header").toString(), this.pathFinder.getShuffleDataPath(), source);
        }
        if (ShifuFileUtils.isFileExists(new Path(rawNormPath, ".pig_schema"), source)) {
            ShifuFileUtils.moveTo(new Path(rawNormPath, ".pig_schema").toString(), this.pathFinder.getShuffleDataPath(), source);
        }
        ShifuFileUtils.deleteFile(rawNormPath, source);
        ShifuFileUtils.move(this.pathFinder.getShuffleDataPath(), rawNormPath, source);
    } else {
        throw new RuntimeException("MapReduce Shuffle Computing Job Failed.");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ValueVisitor(ml.shifu.shifu.util.ValueVisitor) RawSourceData(ml.shifu.shifu.container.obj.RawSourceData) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 30 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class InitModelProcessor method getCountInfoByMRJob.

private Map<Integer, Data> getCountInfoByMRJob() throws IOException, InterruptedException, ClassNotFoundException {
    SourceType source = this.modelConfig.getDataSet().getSource();
    final Configuration conf = new Configuration();
    // add jars to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() });
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true);
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
    conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString());
    conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString());
    conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
    conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.9"));
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
    @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Column Type Auto Checking Job : " + this.modelConfig.getModelSetName());
    job.setJarByClass(getClass());
    job.setMapperClass(AutoTypeDistinctCountMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(CountAndFrequentItemsWritable.class);
    job.setInputFormatClass(CombineInputFormat.class);
    FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.modelConfig.getDataSetRawPath())));
    job.setReducerClass(AutoTypeDistinctCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    String autoTypePath = super.getPathFinder().getAutoTypeFilePath(source);
    FileOutputFormat.setOutputPath(job, new Path(autoTypePath));
    // clean output firstly
    ShifuFileUtils.deleteFile(autoTypePath, source);
    // submit job
    if (job.waitForCompletion(true)) {
        long totalValidCount = job.getCounters().findCounter(Constants.SHIFU_GROUP_COUNTER, "TOTAL_VALID_COUNT").getValue();
        long invalidTagCount = job.getCounters().findCounter(Constants.SHIFU_GROUP_COUNTER, "INVALID_TAG").getValue();
        long filterOut = job.getCounters().findCounter(Constants.SHIFU_GROUP_COUNTER, "FILTER_OUT_COUNT").getValue();
        log.info("Total valid records {}, invalid tag records {}, filter out records {}", totalValidCount, invalidTagCount, filterOut);
        if (totalValidCount > 0L && invalidTagCount * 1d / totalValidCount >= 0.8d) {
            log.error("Too many invalid tags, please check you configuration on positive tags and negative tags.");
        }
        return getCountInfoMap(source, autoTypePath);
    } else {
        throw new RuntimeException("MapReduce Job Auto Type Distinct Count failed.");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ValueVisitor(ml.shifu.shifu.util.ValueVisitor) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)102 Configuration (org.apache.hadoop.conf.Configuration)72 Path (org.apache.hadoop.fs.Path)38 Job (org.apache.hadoop.mapreduce.Job)35 CommandLine (org.apache.commons.cli.CommandLine)18 IOException (java.io.IOException)15 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)11 PosixParser (org.apache.commons.cli.PosixParser)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)9 ParseException (org.apache.commons.cli.ParseException)7 Test (org.junit.jupiter.api.Test)7 ArrayList (java.util.ArrayList)6 Options (org.apache.commons.cli.Options)6 JobConf (org.apache.hadoop.mapred.JobConf)6 File (java.io.File)5 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5