Search in sources :

Example 6 with HistoryEvent

use of org.apache.tez.dag.history.HistoryEvent in project tez by apache.

the class ATSV15HistoryLoggingService method getDomainForEvent.

private String getDomainForEvent(DAGHistoryEvent event) {
    String domainId = sessionDomainId;
    if (historyACLPolicyManager == null) {
        return domainId;
    }
    TezDAGID dagId = event.getDagID();
    HistoryEvent historyEvent = event.getHistoryEvent();
    if (dagId == null || !HistoryEventType.isDAGSpecificEvent(historyEvent.getEventType())) {
        return domainId;
    }
    if (dagDomainIdMap.containsKey(dagId)) {
        // If we already have the domain for the dag id return it
        domainId = dagDomainIdMap.get(dagId);
        // Cleanup if this is the last event.
        if (historyEvent.getEventType() == HistoryEventType.DAG_FINISHED) {
            dagDomainIdMap.remove(dagId);
        }
    } else if (HistoryEventType.DAG_SUBMITTED == historyEvent.getEventType() || HistoryEventType.DAG_RECOVERED == historyEvent.getEventType()) {
        // In case this is the first event for the dag, create and populate dag domain.
        Configuration conf;
        DAGPlan dagPlan;
        if (HistoryEventType.DAG_SUBMITTED == historyEvent.getEventType()) {
            conf = ((DAGSubmittedEvent) historyEvent).getConf();
            dagPlan = ((DAGSubmittedEvent) historyEvent).getDAGPlan();
        } else {
            conf = appContext.getCurrentDAG().getConf();
            dagPlan = appContext.getCurrentDAG().getJobPlan();
        }
        domainId = createDagDomain(conf, dagPlan, dagId);
        // createDagDomain updates skippedDAGs so another check here.
        if (skippedDAGs.contains(dagId)) {
            return null;
        }
        dagDomainIdMap.put(dagId, domainId);
    }
    return domainId;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Example 7 with HistoryEvent

use of org.apache.tez.dag.history.HistoryEvent in project tez by apache.

the class TestHistoryEventsProtoConversion method testSummaryProtoConversion.

private HistoryEvent testSummaryProtoConversion(HistoryEvent historyEvent) throws IOException, TezException {
    SummaryEvent event = (SummaryEvent) historyEvent;
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    HistoryEvent deserializedEvent = null;
    event.toSummaryProtoStream(os);
    os.flush();
    os.close();
    LOG.info("Serialized event to byte array" + ", eventType=" + historyEvent.getEventType() + ", bufLen=" + os.toByteArray().length);
    SummaryEventProto summaryEventProto = SummaryEventProto.parseDelimitedFrom(new ByteArrayInputStream(os.toByteArray()));
    deserializedEvent = ReflectionUtils.createClazzInstance(event.getClass().getName());
    ((SummaryEvent) deserializedEvent).fromSummaryProtoStream(summaryEventProto);
    return deserializedEvent;
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) SummaryEvent(org.apache.tez.dag.history.SummaryEvent) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) SummaryEventProto(org.apache.tez.dag.recovery.records.RecoveryProtos.SummaryEventProto)

Example 8 with HistoryEvent

use of org.apache.tez.dag.history.HistoryEvent in project tez by apache.

the class TestRecovery method testOrderedWordCount.

private void testOrderedWordCount(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
    LOG.info("shutdownCondition:" + shutdownCondition.getEventType() + ", event=" + shutdownCondition.getEvent());
    String inputDirStr = "/tmp/owc-input/";
    Path inputDir = new Path(inputDirStr);
    Path stagingDirPath = new Path("/tmp/owc-staging-dir");
    remoteFs.mkdirs(inputDir);
    remoteFs.mkdirs(stagingDirPath);
    TestTezJobs.generateOrderedWordCountInput(inputDir, remoteFs);
    String outputDirStr = "/tmp/owc-output/";
    Path outputDir = new Path(outputDirStr);
    TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
    tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
    tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName());
    tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, SimpleRecoveryEventHook.class.getName());
    tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize());
    tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);
    tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
    tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");
    OrderedWordCount job = new OrderedWordCount();
    if (generateSplitInClient) {
        Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-generateSplitInClient", inputDirStr, outputDirStr, "5" }, null) == 0);
    } else {
        Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { inputDirStr, outputDirStr, "5" }, null) == 0);
    }
    TestTezJobs.verifyOutput(outputDir, remoteFs);
    List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf, job.getAppId(), 1);
    HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1);
    assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType());
    assertTrue(shutdownCondition.match(lastEvent));
}
Also used : Path(org.apache.hadoop.fs.Path) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) SimpleRecoveryEventHook(org.apache.tez.test.RecoveryServiceWithEventHandlingHook.SimpleRecoveryEventHook) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OrderedWordCount(org.apache.tez.examples.OrderedWordCount)

Example 9 with HistoryEvent

use of org.apache.tez.dag.history.HistoryEvent in project tez by apache.

the class TestRecovery method testHashJoinExample.

private void testHashJoinExample(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
    HashJoinExample hashJoinExample = new HashJoinExample();
    TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
    tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
    tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName());
    tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, SimpleRecoveryEventHook.class.getName());
    tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize());
    tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);
    tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
    tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
    tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");
    hashJoinExample.setConf(tezConf);
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    Path inPath1 = new Path("/tmp/hashJoin/inPath1");
    Path inPath2 = new Path("/tmp/hashJoin/inPath2");
    Path outPath = new Path("/tmp/hashJoin/outPath");
    remoteFs.delete(outPath, true);
    remoteFs.mkdirs(inPath1);
    remoteFs.mkdirs(inPath2);
    remoteFs.mkdirs(stagingDirPath);
    Set<String> expectedResult = new HashSet<String>();
    FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();
    String[] args = null;
    if (generateSplitInClient) {
        args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-generateSplitInClient", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    } else {
        args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    }
    assertEquals(0, hashJoinExample.run(args));
    FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {

        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());
    List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf, hashJoinExample.getAppId(), 1);
    HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1);
    assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType());
    assertTrue(shutdownCondition.match(lastEvent));
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) SimpleRecoveryEventHook(org.apache.tez.test.RecoveryServiceWithEventHandlingHook.SimpleRecoveryEventHook) BufferedWriter(java.io.BufferedWriter) HashJoinExample(org.apache.tez.examples.HashJoinExample) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OutputStreamWriter(java.io.OutputStreamWriter) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashSet(java.util.HashSet)

Example 10 with HistoryEvent

use of org.apache.tez.dag.history.HistoryEvent in project tez by apache.

the class TestAMRecovery method testVertexCompletelyFinished_ScatterGather.

/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is also done. History flush happens. AM dies. Once AM is recovered, task 0
 * and Task 1 is not re-run. (SCATTER_GATHER)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexCompletelyFinished_ScatterGather() throws Exception {
    DAG dag = createDAG("VertexCompletelyFinished_ScatterGather", ControlledShuffleVertexManager.class, DataMovementType.SCATTER_GATHER, false);
    TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
    assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
    assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
    TezCounter outputCounter = counters.findCounter(TestOutput.COUNTER_NAME, TestOutput.COUNTER_NAME);
    TezCounter inputCounter = counters.findCounter(TestInput.COUNTER_NAME, TestInput.COUNTER_NAME);
    // verify that processor, input and output counters, are all being collected
    Assert.assertTrue(outputCounter.getValue() > 0);
    Assert.assertTrue(inputCounter.getValue() > 0);
    List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
    List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
    printHistoryEvents(historyEvents1, 1);
    printHistoryEvents(historyEvents1, 2);
    // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
    // attempt 1
    assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
    assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());
    // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
    // finished in attempt 2
    assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
    assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
Also used : DAG(org.apache.tez.dag.api.DAG) TezCounter(org.apache.tez.common.counters.TezCounter) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TezCounters(org.apache.tez.common.counters.TezCounters) Test(org.junit.Test)

Aggregations

HistoryEvent (org.apache.tez.dag.history.HistoryEvent)23 Test (org.junit.Test)10 Path (org.apache.hadoop.fs.Path)6 TezCounters (org.apache.tez.common.counters.TezCounters)6 DAG (org.apache.tez.dag.api.DAG)6 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)6 Configuration (org.apache.hadoop.conf.Configuration)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)5 HistoryEventType (org.apache.tez.dag.history.HistoryEventType)5 TaskFinishedEvent (org.apache.tez.dag.history.events.TaskFinishedEvent)5 ArrayList (java.util.ArrayList)4 TezDAGID (org.apache.tez.dag.records.TezDAGID)4 IOException (java.io.IOException)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)3 AMLaunchedEvent (org.apache.tez.dag.history.events.AMLaunchedEvent)3 AMStartedEvent (org.apache.tez.dag.history.events.AMStartedEvent)3 TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)3 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)3