Search in sources :

Example 46 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testOrderedWordCount.

@Test(timeout = 60000)
public void testOrderedWordCount() throws Exception {
    String inputDirStr = "/tmp/owc-input/";
    Path inputDir = new Path(inputDirStr);
    Path stagingDirPath = new Path("/tmp/owc-staging-dir");
    remoteFs.mkdirs(inputDir);
    remoteFs.mkdirs(stagingDirPath);
    generateOrderedWordCountInput(inputDir, remoteFs);
    String outputDirStr = "/tmp/owc-output/";
    Path outputDir = new Path(outputDirStr);
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        OrderedWordCount job = new OrderedWordCount();
        Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", inputDirStr, outputDirStr, "2" }, null) == 0);
        verifyOutput(outputDir, remoteFs);
    } finally {
        remoteFs.delete(stagingDirPath, true);
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) OrderedWordCount(org.apache.tez.examples.OrderedWordCount) Test(org.junit.Test)

Example 47 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testMultipleCommits_OnDAGSuccess.

@Test(timeout = 60000)
public void testMultipleCommits_OnDAGSuccess() throws Exception {
    Path stagingDirPath = new Path("/tmp/commit-staging-dir");
    Random rand = new Random();
    String v1OutputPathPrefix = "/tmp/commit-output-v1";
    int v1OutputNum = rand.nextInt(10) + 1;
    String v2OutputPathPrefix = "/tmp/commit-output-v2";
    int v2OutputNum = rand.nextInt(10) + 1;
    String uv12OutputPathPrefix = "/tmp/commit-output-uv12";
    int uv12OutputNum = rand.nextInt(10) + 1;
    String v3OutputPathPrefix = "/tmp/commit-output-v3";
    int v3OutputNum = rand.nextInt(10) + 1;
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        MultipleCommitsExample job = new MultipleCommitsExample();
        Assert.assertTrue("MultipleCommitsExample failed", job.run(tezConf, new String[] { v1OutputPathPrefix, v1OutputNum + "", v2OutputPathPrefix, v2OutputNum + "", uv12OutputPathPrefix, uv12OutputNum + "", v3OutputPathPrefix, v3OutputNum + "" }, null) == 0);
        verifyCommits(v1OutputPathPrefix, v1OutputNum);
        verifyCommits(v2OutputPathPrefix, v2OutputNum);
        verifyCommits(uv12OutputPathPrefix, uv12OutputNum);
        verifyCommits(v3OutputPathPrefix, v3OutputNum);
    } finally {
        remoteFs.delete(stagingDirPath, true);
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Random(java.util.Random) MultipleCommitsExample(org.apache.tez.mapreduce.examples.MultipleCommitsExample) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Example 48 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class FilterLinesByWord method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Credentials credentials = new Credentials();
    boolean generateSplitsInClient = false;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }
    if (otherArgs.length != 3) {
        printUsage();
        return 2;
    }
    String inputPath = otherArgs[0];
    String outputPath = otherArgs[1];
    String filterWord = otherArgs[2];
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputPath))) {
        System.err.println("Output directory : " + outputPath + " already exists");
        return 2;
    }
    TezConfiguration tezConf = new TezConfiguration(conf);
    fs.getWorkingDirectory();
    Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
    String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
    if (jarPath == null) {
        throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
    }
    Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
    fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
    FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
    TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
    Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
    LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
    commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
    TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
    // Why do I need to start the TezSession.
    tezSession.start();
    Configuration stage1Conf = new JobConf(conf);
    stage1Conf.set(FILTER_PARAM_NAME, filterWord);
    Configuration stage2Conf = new JobConf(conf);
    stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
    stage2Conf.setBoolean("mapred.mapper.new-api", false);
    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
    DataSourceDescriptor dsd;
    if (generateSplitsInClient) {
        // TODO TEZ-1406. Dont' use MRInputLegacy
        stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
        stage1Conf.setBoolean("mapred.mapper.new-api", false);
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
    } else {
        dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
    }
    stage1Vertex.addDataSource("MRInput", dsd);
    // Setup stage2 Vertex
    Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
    stage2Vertex.addTaskLocalFiles(commonLocalResources);
    // Configure the Output for stage2
    OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
    stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
    UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
    DAG dag = DAG.create("FilterLinesByWord");
    Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
    LOG.info("Submitted DAG to Tez Session");
    DAGStatus dagStatus = null;
    String[] vNames = { "stage1", "stage2" };
    try {
        while (true) {
            dagStatus = dagClient.getDAGStatus(null);
            if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
                break;
            }
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            // continue;
            }
        }
        while (dagStatus.getState() == DAGStatus.State.RUNNING) {
            try {
                ExampleDriver.printDAGStatus(dagClient, vNames);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                // continue;
                }
                dagStatus = dagClient.getDAGStatus(null);
            } catch (TezException e) {
                LOG.error("Failed to get application progress. Exiting");
                return -1;
            }
        }
        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
    ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Also used : TezException(org.apache.tez.dag.api.TezException) Vertex(org.apache.tez.dag.api.Vertex) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FilterByWordOutputProcessor(org.apache.tez.mapreduce.examples.processor.FilterByWordOutputProcessor) TezClient(org.apache.tez.client.TezClient) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) JobConf(org.apache.hadoop.mapred.JobConf) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) Path(org.apache.hadoop.fs.Path) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Text(org.apache.hadoop.io.Text) DAG(org.apache.tez.dag.api.DAG) TreeMap(java.util.TreeMap) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SplitsInClientOptionParser(org.apache.tez.mapreduce.examples.helpers.SplitsInClientOptionParser) DAGClient(org.apache.tez.dag.api.client.DAGClient) ParseException(org.apache.commons.cli.ParseException) MROutputCommitter(org.apache.tez.mapreduce.committer.MROutputCommitter) Edge(org.apache.tez.dag.api.Edge) Credentials(org.apache.hadoop.security.Credentials) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 49 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTaskErrorsUsingLocalMode method getTezClient.

private TezClient getTezClient(String name) throws IOException, TezException {
    TezConfiguration tezConf1 = new TezConfiguration();
    tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
    tezConf1.set("fs.defaultFS", "file:///");
    tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
    tezConf1.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500);
    TezClient tezClient1 = TezClient.create(name, tezConf1, true);
    tezClient1.start();
    return tezClient1;
}
Also used : TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient)

Example 50 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTaskErrorsUsingLocalMode method testSelfKillReported.

@Test(timeout = 20000)
public void testSelfKillReported() throws IOException, TezException, InterruptedException {
    TezClient tezClient = getTezClient("testSelfKillReported");
    DAGClient dagClient = null;
    try {
        FailingProcessor.configureForKilled(10);
        DAG dag = DAG.create("testSelfKillReported").addVertex(Vertex.create(VERTEX_NAME, ProcessorDescriptor.create(FailingProcessor.class.getName()), 1));
        dagClient = tezClient.submitDAG(dag);
        dagClient.waitForCompletion();
        assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
        assertEquals(10, dagClient.getVertexStatus(VERTEX_NAME, null).getProgress().getKilledTaskAttemptCount());
    } finally {
        if (dagClient != null) {
            dagClient.close();
        }
        tezClient.stop();
    }
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Aggregations

TezClient (org.apache.tez.client.TezClient)58 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)44 Test (org.junit.Test)38 Path (org.apache.hadoop.fs.Path)34 DAG (org.apache.tez.dag.api.DAG)32 DAGClient (org.apache.tez.dag.api.client.DAGClient)29 DAGStatus (org.apache.tez.dag.api.client.DAGStatus)18 Vertex (org.apache.tez.dag.api.Vertex)15 SleepProcessor (org.apache.tez.runtime.library.processor.SleepProcessor)12 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)10 SleepProcessorConfig (org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig)9 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 IOException (java.io.IOException)6 Random (java.util.Random)6 TezException (org.apache.tez.dag.api.TezException)6 UserPayload (org.apache.tez.dag.api.UserPayload)6 YarnApplicationState (org.apache.hadoop.yarn.api.records.YarnApplicationState)5 HashMap (java.util.HashMap)4 ExecutionException (java.util.concurrent.ExecutionException)4