use of org.apache.hadoop.util.GenericOptionsParser in project Gaffer by gchq.
the class SampleDataForSplitPointsHandler method generateSplitsFromSampleData.
private void generateSplitsFromSampleData(final SampleDataForSplitPoints operation, final AccumuloStore store) throws OperationException {
try {
if (store.getTabletServers().size() < 2) {
LOGGER.warn("There is only 1 tablet server so no split points will be calculated.");
return;
}
} catch (final StoreException e) {
throw new OperationException(e.getMessage(), e);
}
try {
/* Parse any Hadoop arguments passed on the command line and use these to configure the Tool */
final Configuration configuration = new GenericOptionsParser(operation.getCommandLineArgs()).getConfiguration();
final SampleDataAndCreateSplitsFileTool sampleTool = new SampleDataAndCreateSplitsFileTool(new AccumuloSampleDataForSplitPointsJobFactory(configuration), operation, store);
ToolRunner.run(sampleTool, operation.getCommandLineArgs());
} catch (final Exception e) {
throw new OperationException(e.getMessage(), e);
}
LOGGER.info("Finished calculating splits");
}
use of org.apache.hadoop.util.GenericOptionsParser in project Gaffer by gchq.
the class AddElementsFromHdfsHandler method fetchElements.
private void fetchElements(final AddElementsFromHdfs operation, final HBaseStore store) throws OperationException {
try {
/* Parse any Hadoop arguments passed on the command line and use these to configure the Tool */
final Configuration configuration = new GenericOptionsParser(store.getConfiguration(), operation.getCommandLineArgs()).getConfiguration();
final AddElementsFromHdfsTool fetchTool = new AddElementsFromHdfsTool(new HBaseAddElementsFromHdfsJobFactory(configuration), operation, store);
LOGGER.info("Running FetchElementsFromHdfsTool job");
ToolRunner.run(fetchTool, operation.getCommandLineArgs());
LOGGER.info("Finished running FetchElementsFromHdfsTool job");
} catch (final Exception e) {
LOGGER.error("Failed to fetch elements from HDFS: {}", e.getMessage());
throw new OperationException("Failed to fetch elements from HDFS", e);
}
}
use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.
the class PostTrainModelProcessor method runMRBinAvgScoreJob.
private void runMRBinAvgScoreJob(SourceType source, String postTrainOutputPath) throws IOException, InterruptedException, ClassNotFoundException {
final Configuration conf = new Configuration();
// add jars to hadoop mapper and reducer
new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() });
conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true"));
conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString());
conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString());
conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
// set mapreduce.job.max.split.locations to 30 to suppress warnings
conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8"));
String hdpVersion = HDPUtils.getHdpVersionForHDP224();
if (StringUtils.isNotBlank(hdpVersion)) {
// for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
conf.set("hdp.version", hdpVersion);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
}
// one can set guagua conf in shifuconfig
CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {
@Override
public void inject(Object key, Object value) {
conf.set(key.toString(), value.toString());
}
});
@SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Post Train : " + this.modelConfig.getModelSetName());
job.setJarByClass(getClass());
job.setMapperClass(PostTrainMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(FeatureStatsWritable.class);
job.setInputFormatClass(CombineInputFormat.class);
FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(super.modelConfig.getDataSetRawPath())));
MultipleOutputs.addNamedOutput(job, Constants.POST_TRAIN_OUTPUT_SCORE, TextOutputFormat.class, NullWritable.class, Text.class);
job.setReducerClass(PostTrainReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(postTrainOutputPath));
// clean output firstly
ShifuFileUtils.deleteFile(postTrainOutputPath, source);
// submit job
if (!job.waitForCompletion(true)) {
throw new RuntimeException("Post train Bin Avg Score MapReduce job is failed.");
}
}
use of org.apache.hadoop.util.GenericOptionsParser in project shifu by ShifuML.
the class MapReducerStatsWorker method prepareJobConf.
private void prepareJobConf(RawSourceData.SourceType source, final Configuration conf, String filePath) throws IOException {
// add jars to hadoop mapper and reducer
new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", filePath });
conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true);
conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true"));
conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true);
conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true);
conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true);
conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true);
conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(this.pathFinder.getModelConfigPath(source))).toString());
conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source).makeQualified(new Path(this.pathFinder.getColumnConfigPath(source))).toString());
conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default"));
conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString());
// set mapreduce.job.max.split.locations to 30 to suppress warnings
conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000);
conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8"));
conf.set(Constants.SHIFU_STATS_FILTER_EXPRESSIONS, super.modelConfig.getSegmentFilterExpressionsAsString());
log.info("segment expressions is {}", super.modelConfig.getSegmentFilterExpressionsAsString());
String hdpVersion = HDPUtils.getHdpVersionForHDP224();
if (StringUtils.isNotBlank(hdpVersion)) {
// for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path
conf.set("hdp.version", hdpVersion);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf);
HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf);
}
// one can set guagua conf in shifuconfig
CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {
@Override
public void inject(Object key, Object value) {
conf.set(key.toString(), value.toString());
}
});
}
use of org.apache.hadoop.util.GenericOptionsParser in project hbase by apache.
the class ChaosService method main.
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
new GenericOptionsParser(conf, args);
ChoreService choreChaosService = null;
ScheduledChore authChore = AuthUtil.getAuthChore(conf);
try {
if (authChore != null) {
choreChaosService = new ChoreService(ChaosConstants.CHORE_SERVICE_PREFIX);
choreChaosService.scheduleChore(authChore);
}
execute(args, conf);
} finally {
if (authChore != null)
choreChaosService.shutdown();
}
}
Aggregations