Search in sources :

Example 66 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project tez by apache.

the class FilterLinesByWord method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }
    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }
    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }
    TezConfiguration tezConf = new TezConfiguration(conf);
    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }
    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
    // Why do I need to start the TezSession.
    tezSession.start();
    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);
    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);
    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);
    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);
    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");
    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            // continue;
            }
        }
        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.error("Failed to get application progress. Exiting");
                return -1;
            }
        }
        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Also used : TezException(org.apache.tez.dag.api.TezException) Vertex(org.apache.tez.dag.api.Vertex) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FilterByWordOutputProcessor(org.apache.tez.mapreduce.examples.processor.FilterByWordOutputProcessor) TezClient(org.apache.tez.client.TezClient) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) JobConf(org.apache.hadoop.mapred.JobConf) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) Path(org.apache.hadoop.fs.Path) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Text(org.apache.hadoop.io.Text) DAG(org.apache.tez.dag.api.DAG) TreeMap(java.util.TreeMap) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SplitsInClientOptionParser(org.apache.tez.mapreduce.examples.helpers.SplitsInClientOptionParser) DAGClient(org.apache.tez.dag.api.client.DAGClient) ParseException(org.apache.commons.cli.ParseException) MROutputCommitter(org.apache.tez.mapreduce.committer.MROutputCommitter) Edge(org.apache.tez.dag.api.Edge) Credentials(org.apache.hadoop.security.Credentials) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 67 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project incubator-rya by apache.

the class ToolConfigUtils method getUserArguments.

/**
 * Gets the set of user arguments from the user's config and/or their extra supplied
 * command line arguments.  This weeds out all the automatically generated parameters created
 * from initializing a {@link Configuration} object and should only give back a set of arguments
 * provided directly by the user.
 * @param conf the {@link Configuration} provided.
 * @param args the extra arguments from the command line.
 * @return a {@link Set} of argument strings.
 * @throws IOException
 */
public static Set<String> getUserArguments(final Configuration conf, final String[] args) throws IOException {
    String[] filteredArgs = new String[] {};
    if (Arrays.asList(args).contains("-conf")) {
        // parse args
        new GenericOptionsParser(conf, args);
        final List<String> commandLineArgs = new ArrayList<>();
        for (final String arg : args) {
            if (arg.startsWith("-D")) {
                commandLineArgs.add(arg);
            }
        }
        filteredArgs = commandLineArgs.toArray(new String[0]);
    } else {
        filteredArgs = args;
    }
    // Get the supplied config name from the resource string.
    // No real easy way of getting the name.
    // So, pulling it off the list of resource names in the Configuration's toString() method
    // where it should be the last one.
    final String confString = conf.toString();
    final String resourceString = StringUtils.removeStart(confString, "Configuration: ");
    final List<String> resourceNames = Arrays.asList(StringUtils.split(resourceString, ", "));
    final String configFilename = resourceNames.get(resourceNames.size() - 1);
    final Set<String> toolArgsSet = new HashSet<>();
    final File file = new File(configFilename);
    // should happen if no config is supplied.
    if (file.exists()) {
        XMLConfiguration configuration = null;
        try {
            configuration = new XMLConfiguration(configFilename);
            toolArgsSet.addAll(getConfigArguments(configuration));
        } catch (final ConfigurationException e) {
            log.error("Unable to load configuration file.", e);
        }
    }
    toolArgsSet.addAll(Arrays.asList(filteredArgs));
    return Collections.unmodifiableSet(toolArgsSet);
}
Also used : XMLConfiguration(org.apache.commons.configuration.XMLConfiguration) ConfigurationException(org.apache.commons.configuration.ConfigurationException) ArrayList(java.util.ArrayList) File(java.io.File) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) HashSet(java.util.HashSet)

Example 68 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project cdap by caskdata.

the class WordCount method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    Job job = Job.getInstance(conf, "word count");
    configureJob(job, inputPath, outputPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 69 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project systemml by apache.

the class DMLAppMaster method runApplicationMaster.

public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();
    // obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
    // initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);
    rmClient.start();
    // register with ResourceManager
    // host, port for rm communication
    rmClient.registerApplicationMaster("", 0, "");
    LOG.debug("Registered the SystemML application master with resource manager");
    // start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");
    // set DMLscript app master context
    DMLScript.setActiveAM();
    // parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
    // run SystemML CP
    FinalApplicationStatus status = null;
    try {
        // core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);
        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        // stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");
        // unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}
Also used : FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) IOException(java.io.IOException) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 70 with GenericOptionsParser

use of org.apache.hadoop.util.GenericOptionsParser in project systemml by apache.

the class DMLScript method main.

/**
 * @param args command-line arguments
 * @throws IOException if an IOException occurs
 */
public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    try {
        DMLScript.executeScript(conf, otherArgs);
    } catch (ParseException pe) {
        System.err.println(pe.getMessage());
    } catch (DMLScriptException e) {
        // In case of DMLScriptException, simply print the error message.
        System.err.println(e.getMessage());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MRJobConfiguration(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) ParseException(org.apache.sysml.parser.ParseException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)102 Configuration (org.apache.hadoop.conf.Configuration)72 Path (org.apache.hadoop.fs.Path)38 Job (org.apache.hadoop.mapreduce.Job)35 CommandLine (org.apache.commons.cli.CommandLine)18 IOException (java.io.IOException)15 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)11 PosixParser (org.apache.commons.cli.PosixParser)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)10 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)9 ParseException (org.apache.commons.cli.ParseException)7 Test (org.junit.jupiter.api.Test)7 ArrayList (java.util.ArrayList)6 Options (org.apache.commons.cli.Options)6 JobConf (org.apache.hadoop.mapred.JobConf)6 File (java.io.File)5 HashMap (java.util.HashMap)5 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5