Search in sources :

Example 41 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testOrderedWordCountDisableSplitGrouping.

@Test(timeout = 60000)
public void testOrderedWordCountDisableSplitGrouping() throws Exception {
    String inputDirStr = TEST_ROOT_DIR + "/tmp/owc-input/";
    Path inputDir = new Path(inputDirStr);
    Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/owc-staging-dir");
    localFs.mkdirs(inputDir);
    localFs.mkdirs(stagingDirPath);
    generateOrderedWordCountInput(inputDir, localFs);
    String outputDirStr = TEST_ROOT_DIR + "/tmp/owc-output/";
    localFs.delete(new Path(outputDirStr), true);
    Path outputDir = new Path(outputDirStr);
    TezConfiguration tezConf = new TezConfiguration(conf);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        OrderedWordCount job = new OrderedWordCount();
        Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", "-local", "-disableSplitGrouping", inputDirStr, outputDirStr, "2" }, null) == 0);
        verifyOutput(outputDir, localFs);
    } finally {
        localFs.delete(stagingDirPath, true);
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) OrderedWordCount(org.apache.tez.examples.OrderedWordCount) Test(org.junit.Test)

Example 42 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testVertexFailuresMaxPercent.

@Test(timeout = 60000)
public void testVertexFailuresMaxPercent() throws TezException, InterruptedException, IOException {
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, "50.0f");
    tezConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
    TezClient tezClient = TezClient.create("TestVertexFailuresMaxPercent", tezConf);
    tezClient.start();
    try {
        DAG dag = DAG.create("TestVertexFailuresMaxPercent");
        Vertex vertex1 = Vertex.create("Parent", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);
        Vertex vertex2 = Vertex.create("Child", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);
        OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName()).setFromConfiguration(tezConf).build();
        dag.addVertex(vertex1).addVertex(vertex2).addEdge(Edge.create(vertex1, vertex2, edgeConfig.createDefaultEdgeProperty()));
        DAGClient dagClient = tezClient.submitDAG(dag);
        dagClient.waitForCompletion();
        Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    } finally {
        tezClient.stop();
    }
}
Also used : OrderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig) Vertex(org.apache.tez.dag.api.Vertex) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) DAGClient(org.apache.tez.dag.api.client.DAGClient) Text(org.apache.hadoop.io.Text) MultiAttemptDAG(org.apache.tez.test.dag.MultiAttemptDAG) DAG(org.apache.tez.dag.api.DAG) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Example 43 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testPerIOCounterAggregation.

@Test(timeout = 60000)
public void testPerIOCounterAggregation() throws Exception {
    String baseDir = "/tmp/perIOCounterAgg/";
    Path inPath1 = new Path(baseDir + "inPath1");
    Path inPath2 = new Path(baseDir + "inPath2");
    Path outPath = new Path(baseDir + "outPath");
    final Set<String> expectedResults = generateSortMergeJoinInput(inPath1, inPath2);
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    remoteFs.mkdirs(stagingDirPath);
    TezConfiguration conf = new TezConfiguration(mrrTezCluster.getConfig());
    conf.setBoolean(TezConfiguration.TEZ_TASK_GENERATE_COUNTERS_PER_IO, true);
    TezClient tezClient = TezClient.create(SortMergeJoinHelper.class.getSimpleName(), conf);
    tezClient.start();
    SortMergeJoinHelper sortMergeJoinHelper = new SortMergeJoinHelper(tezClient);
    sortMergeJoinHelper.setConf(conf);
    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    assertEquals(0, sortMergeJoinHelper.run(conf, args, tezClient));
    verifySortMergeJoinInput(outPath, expectedResults);
    String joinerVertexName = "joiner";
    String input1Name = "input1";
    String input2Name = "input2";
    String joinOutputName = "joinOutput";
    Set<StatusGetOpts> statusOpts = new HashSet<StatusGetOpts>();
    statusOpts.add(StatusGetOpts.GET_COUNTERS);
    VertexStatus joinerVertexStatus = sortMergeJoinHelper.dagClient.getVertexStatus(joinerVertexName, statusOpts);
    final TezCounters joinerCounters = joinerVertexStatus.getVertexCounters();
    final CounterGroup aggregatedGroup = joinerCounters.getGroup(TaskCounter.class.getCanonicalName());
    final CounterGroup input1Group = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input1Name);
    final CounterGroup input2Group = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_INPUT_" + input2Name);
    assertTrue("aggregated counter group cannot be empty", aggregatedGroup.size() > 0);
    assertTrue("per io group for input1 cannot be empty", input1Group.size() > 0);
    assertTrue("per io group for input1 cannot be empty", input2Group.size() > 0);
    List<TaskCounter> countersToVerifyAgg = Arrays.asList(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ, TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN, TaskCounter.COMBINE_INPUT_RECORDS, TaskCounter.MERGED_MAP_OUTPUTS, TaskCounter.NUM_DISK_TO_DISK_MERGES, TaskCounter.NUM_FAILED_SHUFFLE_INPUTS, TaskCounter.NUM_MEM_TO_DISK_MERGES, TaskCounter.NUM_SHUFFLED_INPUTS, TaskCounter.NUM_SKIPPED_INPUTS, TaskCounter.REDUCE_INPUT_GROUPS, TaskCounter.REDUCE_INPUT_RECORDS, TaskCounter.SHUFFLE_BYTES, TaskCounter.SHUFFLE_BYTES_DECOMPRESSED, TaskCounter.SHUFFLE_BYTES_DISK_DIRECT, TaskCounter.SHUFFLE_BYTES_TO_DISK, TaskCounter.SHUFFLE_BYTES_TO_MEM, TaskCounter.SPILLED_RECORDS);
    int nonZeroCounters = 0;
    // verify that the sum of the counter values for edges add up to the aggregated counter value.
    for (TaskCounter c : countersToVerifyAgg) {
        TezCounter aggregatedCounter = aggregatedGroup.findCounter(c.name(), false);
        TezCounter input1Counter = input1Group.findCounter(c.name(), false);
        TezCounter input2Counter = input2Group.findCounter(c.name(), false);
        assertNotNull("aggregated counter cannot be null " + c.name(), aggregatedCounter);
        assertNotNull("input1 counter cannot be null " + c.name(), input1Counter);
        assertNotNull("input2 counter cannot be null " + c.name(), input2Counter);
        assertEquals("aggregated counter does not match sum of input counters " + c.name(), aggregatedCounter.getValue(), input1Counter.getValue() + input2Counter.getValue());
        if (aggregatedCounter.getValue() > 0) {
            nonZeroCounters++;
        }
    }
    // ensure that at least one of the counters tested above were non-zero.
    assertTrue("At least one of the counter should be non-zero. invalid test ", nonZeroCounters > 0);
    CounterGroup joinerOutputGroup = joinerCounters.getGroup(TaskCounter.class.getSimpleName() + "_" + joinerVertexName + "_OUTPUT_" + joinOutputName);
    String outputCounterName = TaskCounter.OUTPUT_RECORDS.name();
    TezCounter aggregateCounter = aggregatedGroup.findCounter(outputCounterName, false);
    TezCounter joinerOutputCounter = joinerOutputGroup.findCounter(outputCounterName, false);
    assertNotNull("aggregated counter cannot be null " + outputCounterName, aggregateCounter);
    assertNotNull("output counter cannot be null " + outputCounterName, joinerOutputCounter);
    assertTrue("counter value is zero. test is invalid", aggregateCounter.getValue() > 0);
    assertEquals("aggregated counter does not match sum of output counters " + outputCounterName, aggregateCounter.getValue(), joinerOutputCounter.getValue());
}
Also used : Path(org.apache.hadoop.fs.Path) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) CounterGroup(org.apache.tez.common.counters.CounterGroup) TezCounter(org.apache.tez.common.counters.TezCounter) TezCounters(org.apache.tez.common.counters.TezCounters) TezClient(org.apache.tez.client.TezClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TaskCounter(org.apache.tez.common.counters.TaskCounter) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 44 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testMultipleCommits_OnVertexSuccess.

@Test(timeout = 60000)
public void testMultipleCommits_OnVertexSuccess() throws Exception {
    Path stagingDirPath = new Path("/tmp/commit-staging-dir");
    Random rand = new Random();
    String v1OutputPathPrefix = "/tmp/commit-output-v1";
    int v1OutputNum = rand.nextInt(10) + 1;
    String v2OutputPathPrefix = "/tmp/commit-output-v2";
    int v2OutputNum = rand.nextInt(10) + 1;
    String uv12OutputPathPrefix = "/tmp/commit-output-uv12";
    int uv12OutputNum = rand.nextInt(10) + 1;
    String v3OutputPathPrefix = "/tmp/commit-output-v3";
    int v3OutputNum = rand.nextInt(10) + 1;
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        MultipleCommitsExample job = new MultipleCommitsExample();
        Assert.assertTrue("MultipleCommitsExample failed", job.run(tezConf, new String[] { v1OutputPathPrefix, v1OutputNum + "", v2OutputPathPrefix, v2OutputNum + "", uv12OutputPathPrefix, uv12OutputNum + "", v3OutputPathPrefix, v3OutputNum + "", MultipleCommitsExample.CommitOnVertexSuccessOption }, null) == 0);
        verifyCommits(v1OutputPathPrefix, v1OutputNum);
        verifyCommits(v2OutputPathPrefix, v2OutputNum);
        verifyCommits(uv12OutputPathPrefix, uv12OutputNum);
        verifyCommits(v3OutputPathPrefix, v3OutputNum);
    } finally {
        remoteFs.delete(stagingDirPath, true);
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Random(java.util.Random) MultipleCommitsExample(org.apache.tez.mapreduce.examples.MultipleCommitsExample) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Example 45 with TezClient

use of org.apache.tez.client.TezClient in project tez by apache.

the class TestTezJobs method testVertexOrder.

@Test(timeout = 60000)
public void testVertexOrder() throws Exception {
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    TezClient tezClient = TezClient.create("TestVertexOrder", tezConf);
    tezClient.start();
    try {
        DAG dag = SimpleTestDAG.createDAGForVertexOrder("dag1", conf);
        DAGClient dagClient = tezClient.submitDAG(dag);
        DAGStatus dagStatus = dagClient.getDAGStatus(null);
        while (!dagStatus.isCompleted()) {
            LOG.info("Waiting for dag to complete. Sleeping for 500ms." + " DAG name: " + dag.getName() + " DAG context: " + dagClient.getExecutionContext() + " Current state: " + dagStatus.getState());
            Thread.sleep(100);
            dagStatus = dagClient.getDAGStatus(null);
        }
        Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
        // verify vertex order
        Set<String> resultVertices = dagStatus.getVertexProgress().keySet();
        Assert.assertEquals(6, resultVertices.size());
        int i = 0;
        for (String vertexName : resultVertices) {
            if (i <= 1) {
                Assert.assertTrue(vertexName.equals("v1") || vertexName.equals("v2"));
            } else if (i == 2) {
                Assert.assertTrue(vertexName.equals("v3"));
            } else if (i <= 4) {
                Assert.assertTrue(vertexName.equals("v4") || vertexName.equals("v5"));
            } else {
                Assert.assertTrue(vertexName.equals("v6"));
            }
            i++;
        }
    } finally {
        if (tezClient != null) {
            tezClient.stop();
        }
    }
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) MultiAttemptDAG(org.apache.tez.test.dag.MultiAttemptDAG) DAG(org.apache.tez.dag.api.DAG) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Aggregations

TezClient (org.apache.tez.client.TezClient)58 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)44 Test (org.junit.Test)38 Path (org.apache.hadoop.fs.Path)34 DAG (org.apache.tez.dag.api.DAG)32 DAGClient (org.apache.tez.dag.api.client.DAGClient)29 DAGStatus (org.apache.tez.dag.api.client.DAGStatus)18 Vertex (org.apache.tez.dag.api.Vertex)15 SleepProcessor (org.apache.tez.runtime.library.processor.SleepProcessor)12 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)10 SleepProcessorConfig (org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig)9 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 IOException (java.io.IOException)6 Random (java.util.Random)6 TezException (org.apache.tez.dag.api.TezException)6 UserPayload (org.apache.tez.dag.api.UserPayload)6 YarnApplicationState (org.apache.hadoop.yarn.api.records.YarnApplicationState)5 HashMap (java.util.HashMap)4 ExecutionException (java.util.concurrent.ExecutionException)4