Search in sources :

Example 91 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project Gaffer by gchq.

the class SampleDataForSplitPointsHandler method generateSplitsFromSampleData.

private void generateSplitsFromSampleData(final SampleDataForSplitPoints operation, final AccumuloStore store) throws OperationException {
    try {
        if (store.getTabletServers().size() < 2) {
            LOGGER.warn("There is only 1 tablet server so no split points will be calculated.");
            return;
        }
    } catch (final StoreException e) {
        throw new OperationException(e.getMessage(), e);
    }
    try {
        /* Parse any Hadoop arguments passed on the command line and use these to configure the Tool */
        final Configuration configuration = new GenericOptionsParser(operation.getCommandLineArgs()).getConfiguration();
        final SampleDataAndCreateSplitsFileTool sampleTool = new SampleDataAndCreateSplitsFileTool(new AccumuloSampleDataForSplitPointsJobFactory(configuration), operation, store);
        ToolRunner.run(sampleTool, operation.getCommandLineArgs());
    } catch (final Exception e) {
        throw new OperationException(e.getMessage(), e);
    }
    LOGGER.info("Finished calculating splits");
}
Also used : AccumuloSampleDataForSplitPointsJobFactory(uk.gov.gchq.gaffer.accumulostore.operation.hdfs.handler.job.factory.AccumuloSampleDataForSplitPointsJobFactory) Configuration(org.apache.hadoop.conf.Configuration) SampleDataAndCreateSplitsFileTool(uk.gov.gchq.gaffer.hdfs.operation.handler.job.tool.SampleDataAndCreateSplitsFileTool) OperationException(uk.gov.gchq.gaffer.operation.OperationException) StoreException(uk.gov.gchq.gaffer.store.StoreException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) StoreException(uk.gov.gchq.gaffer.store.StoreException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 92 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project Gaffer by gchq.

the class AddElementsFromHdfsHandler method fetchElements.

private void fetchElements(final AddElementsFromHdfs operation, final HBaseStore store) throws OperationException {
    try {
        /* Parse any Hadoop arguments passed on the command line and use these to configure the Tool */
        final Configuration configuration = new GenericOptionsParser(store.getConfiguration(), operation.getCommandLineArgs()).getConfiguration();
        final AddElementsFromHdfsTool fetchTool = new AddElementsFromHdfsTool(new HBaseAddElementsFromHdfsJobFactory(configuration), operation, store);
        LOGGER.info("Running FetchElementsFromHdfsTool job");
        ToolRunner.run(fetchTool, operation.getCommandLineArgs());
        LOGGER.info("Finished running FetchElementsFromHdfsTool job");
    } catch (final Exception e) {
        LOGGER.error("Failed to fetch elements from HDFS: {}", e.getMessage());
        throw new OperationException("Failed to fetch elements from HDFS", e);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AddElementsFromHdfsTool(uk.gov.gchq.gaffer.hdfs.operation.handler.job.tool.AddElementsFromHdfsTool) HBaseAddElementsFromHdfsJobFactory(uk.gov.gchq.gaffer.hbasestore.operation.hdfs.handler.job.factory.HBaseAddElementsFromHdfsJobFactory) IOException(java.io.IOException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 93 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class PostTrainModelProcessor method runMRBinAvgScoreJob.

private void runMRBinAvgScoreJob(SourceType source, String postTrainOutputPath) throws IOException, InterruptedException, ClassNotFoundException {
    final Configuration conf = new Configuration();
    // add jars to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() });
    conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true"));
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString());
    conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString());
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
    // set mapreduce.job.max.split.locations to 30 to suppress warnings
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
    conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8"));
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
    @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Post Train : " + this.modelConfig.getModelSetName());
    job.setJarByClass(getClass());
    job.setMapperClass(PostTrainMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FeatureStatsWritable.class);
    job.setInputFormatClass(CombineInputFormat.class);
    FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.modelConfig.getDataSetRawPath())));
    MultipleOutputs.addNamedOutput(job, Constants.POST_TRAIN_OUTPUT_SCORE, TextOutputFormat.class, NullWritable.class, Text.class);
    job.setReducerClass(PostTrainReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(postTrainOutputPath));
    // clean output firstly
    ShifuFileUtils.deleteFile(postTrainOutputPath, source);
    // submit job
    if (!job.waitForCompletion(true)) {
        throw new RuntimeException("Post train Bin Avg Score MapReduce job is failed.");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 94 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.

the class MapReducerStatsWorker method prepareJobConf.

private void prepareJobConf(RawSourceData.SourceType source, final Configuration conf, String filePath) throws IOException {
    // add jars to hadoop mapper and reducer
    new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", filePath });
    conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true"));
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true);
    conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true);
    conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(this.pathFinder.getModelConfigPath(source))).toString());
    conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(this.pathFinder.getColumnConfigPath(source))).toString());
    conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
    conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
    // set mapreduce.job.max.split.locations to 30 to suppress warnings
    conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
    conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8"));
    conf.set(Constants.SHIFU_STATS_FILTER_EXPRESSIONS, super.modelConfig.getSegmentFilterExpressionsAsString());
    log.info("segment expressions is {}", super.modelConfig.getSegmentFilterExpressionsAsString());
    String hdpVersion = HDPUtils.getHdpVersionForHDP224();
    if (StringUtils.isNotBlank(hdpVersion)) {
        // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
        conf.set("hdp.version", hdpVersion);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
        HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
    }
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            conf.set(key.toString(), value.toString());
        }
    });
}
Also used : Path(org.apache.hadoop.fs.Path) ValueVisitor(ml.shifu.shifu.util.ValueVisitor) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 95 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project hbase by apache.

the class ChaosService method main.

public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    new GenericOptionsParser(conf, args);
    ChoreService choreChaosService = null;
    ScheduledChore authChore = AuthUtil.getAuthChore(conf);
    try {
        if (authChore != null) {
            choreChaosService = new ChoreService(ChaosConstants.CHORE_SERVICE_PREFIX);
            choreChaosService.scheduleChore(authChore);
        }
        execute(args, conf);
    } finally {
        if (authChore != null)
            choreChaosService.shutdown();
    }
}
Also used : ChoreService(org.apache.hadoop.hbase.ChoreService) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ScheduledChore(org.apache.hadoop.hbase.ScheduledChore) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)102 Configuration (org.apache.hadoop.conf.Configuration)72 Path (org.apache.hadoop.fs.Path)38 Job (org.apache.hadoop.mapreduce.Job)35 CommandLine (org.apache.commons.cli.CommandLine)18 IOException (java.io.IOException)15 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)11 PosixParser (org.apache.commons.cli.PosixParser)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)9 ParseException (org.apache.commons.cli.ParseException)7 Test (org.junit.jupiter.api.Test)7 ArrayList (java.util.ArrayList)6 Options (org.apache.commons.cli.Options)6 JobConf (org.apache.hadoop.mapred.JobConf)6 File (java.io.File)5 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5