use of org.apache.hadoop.util.GenericOptionsParser in project tez by apache.
the class FilterLinesByWord method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Credentials credentials = new Credentials();
boolean generateSplitsInClient = false;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
if (otherArgs.length != 3) {
printUsage();
return 2;
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
String filterWord = otherArgs[2];
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputPath))) {
System.err.println("Output directory : " + outputPath + " already exists");
return 2;
}
TezConfiguration tezConf = new TezConfiguration(conf);
fs.getWorkingDirectory();
Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
}
Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
// Why do I need to start the TezSession.
tezSession.start();
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
DataSourceDescriptor dsd;
if (generateSplitsInClient) {
// TODO TEZ-1406. Dont' use MRInputLegacy
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
} else {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
}
stage1Vertex.addDataSource("MRInput", dsd);
// Setup stage2 Vertex
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
stage2Vertex.addTaskLocalFiles(commonLocalResources);
// Configure the Output for stage2
OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("FilterLinesByWord");
Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.hadoop.util.GenericOptionsParser in project incubator-rya by apache.
the class ToolConfigUtils method getUserArguments.
/**
* Gets the set of user arguments from the user's config and/or their extra supplied
* command line arguments. This weeds out all the automatically generated parameters created
* from initializing a {@link Configuration} object and should only give back a set of arguments
* provided directly by the user.
* @param conf the {@link Configuration} provided.
* @param args the extra arguments from the command line.
* @return a {@link Set} of argument strings.
* @throws IOException
*/
public static Set<String> getUserArguments(final Configuration conf, final String[] args) throws IOException {
String[] filteredArgs = new String[] {};
if (Arrays.asList(args).contains("-conf")) {
// parse args
new GenericOptionsParser(conf, args);
final List<String> commandLineArgs = new ArrayList<>();
for (final String arg : args) {
if (arg.startsWith("-D")) {
commandLineArgs.add(arg);
}
}
filteredArgs = commandLineArgs.toArray(new String[0]);
} else {
filteredArgs = args;
}
// Get the supplied config name from the resource string.
// No real easy way of getting the name.
// So, pulling it off the list of resource names in the Configuration's toString() method
// where it should be the last one.
final String confString = conf.toString();
final String resourceString = StringUtils.removeStart(confString, "Configuration: ");
final List<String> resourceNames = Arrays.asList(StringUtils.split(resourceString, ", "));
final String configFilename = resourceNames.get(resourceNames.size() - 1);
final Set<String> toolArgsSet = new HashSet<>();
final File file = new File(configFilename);
// should happen if no config is supplied.
if (file.exists()) {
XMLConfiguration configuration = null;
try {
configuration = new XMLConfiguration(configFilename);
toolArgsSet.addAll(getConfigArguments(configuration));
} catch (final ConfigurationException e) {
log.error("Unable to load configuration file.", e);
}
}
toolArgsSet.addAll(Arrays.asList(filteredArgs));
return Collections.unmodifiableSet(toolArgsSet);
}
use of org.apache.hadoop.util.GenericOptionsParser in project cdap by caskdata.
the class WordCount method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
Job job = Job.getInstance(conf, "word count");
configureJob(job, inputPath, outputPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.util.GenericOptionsParser in project systemml by apache.
the class DMLAppMaster method runApplicationMaster.
public void runApplicationMaster(String[] args) throws YarnException, IOException {
_conf = new YarnConfiguration();
// obtain application ID
String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
_appId = containerId.getApplicationAttemptId().getApplicationId();
LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
// initialize clients to ResourceManager
AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
rmClient.init(_conf);
rmClient.start();
// register with ResourceManager
// host, port for rm communication
rmClient.registerApplicationMaster("", 0, "");
LOG.debug("Registered the SystemML application master with resource manager");
// start status reporter to ResourceManager
DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
reporter.start();
LOG.debug("Started status reporter (heartbeat to resource manager)");
// set DMLscript app master context
DMLScript.setActiveAM();
// parse input arguments
String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
// run SystemML CP
FinalApplicationStatus status = null;
try {
// core dml script execution (equivalent to non-AM runtime)
boolean success = DMLScript.executeScript(_conf, otherArgs);
if (success)
status = FinalApplicationStatus.SUCCEEDED;
else
status = FinalApplicationStatus.FAILED;
} catch (DMLScriptException ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
status = FinalApplicationStatus.FAILED;
writeMessageToHDFSWorkingDir(ex.getMessage());
} catch (Exception ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
status = FinalApplicationStatus.FAILED;
} finally {
// stop periodic status reports
reporter.stopStatusReporter();
LOG.debug("Stopped status reporter");
// unregister resource manager client
rmClient.unregisterApplicationMaster(status, "", "");
LOG.debug("Unregistered the SystemML application master");
}
}
use of org.apache.hadoop.util.GenericOptionsParser in project systemml by apache.
the class DMLScript method main.
/**
* @param args command-line arguments
* @throws IOException if an IOException occurs
*/
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
try {
DMLScript.executeScript(conf, otherArgs);
} catch (ParseException pe) {
System.err.println(pe.getMessage());
} catch (DMLScriptException e) {
// In case of DMLScriptException, simply print the error message.
System.err.println(e.getMessage());
}
}
Aggregations