Search in sources :

Example 46 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestRecoveryService method testRecoveryFlushOnSummaryEvent.

@Test(timeout = 5000)
public void testRecoveryFlushOnSummaryEvent() throws Exception {
    setup(true, new String[][] { { TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, "-1" }, { TezConfiguration.DAG_RECOVERY_FLUSH_INTERVAL_SECS, "-1" } });
    recoveryService.start();
    DAGPlan dagPlan = DAGPlan.newBuilder().setName("test_dag").build();
    // This writes to recovery immediately.
    recoveryService.handle(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, startTime, dagPlan, appAttemptId, null, "nobody", conf, null, "default")));
    waitForDrain(-1);
    verify(summaryFos, times(1)).hflush();
    verify(dagFos, times(1)).hflush();
    // This does not write to recovery immediately.
    recoveryService.handle(new DAGHistoryEvent(dagId, new DAGCommitStartedEvent(dagId, startTime)));
    waitForDrain(-1);
    verify(summaryFos, times(2)).hflush();
    verify(dagFos, times(1)).hflush();
    // Does flush on stop.
    recoveryService.stop();
    verify(dagFos, times(2)).hflush();
}
Also used : DAGCommitStartedEvent(org.apache.tez.dag.history.events.DAGCommitStartedEvent) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) Test(org.junit.Test)

Example 47 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestRecoveryService method testSummaryPathExisted.

@Test(timeout = 5000)
public void testSummaryPathExisted() throws Exception {
    setup(false, null);
    recoveryService.start();
    touchFile(summaryPath);
    assertFalse(recoveryService.hasRecoveryFailed());
    recoveryService.handle(new DAGHistoryEvent(dagId, new DAGFinishedEvent(dagId, 1L, 2L, DAGState.ERROR, "diag", null, "user", "dag1", null, appAttemptId, null)));
    assertTrue(recoveryService.hasRecoveryFailed());
    // be able to handle event after fatal error
    recoveryService.handle(new DAGHistoryEvent(dagId, new DAGFinishedEvent(dagId, 1L, 2L, DAGState.ERROR, "diag", null, "user", "dag1", null, appAttemptId, null)));
    recoveryService.stop();
}
Also used : DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DAGFinishedEvent(org.apache.tez.dag.history.events.DAGFinishedEvent) Test(org.junit.Test)

Example 48 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestRecoveryService method testRecoveryPathExisted.

@Test(timeout = 5000)
public void testRecoveryPathExisted() throws Exception {
    setup(false, null);
    recoveryService.start();
    touchFile(dagRecoveryPath);
    assertFalse(recoveryService.hasRecoveryFailed());
    recoveryService.handle(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskId, "v1", 0L, 0L)));
    // wait for recovery event to be handled
    recoveryService.await();
    assertTrue(recoveryService.hasRecoveryFailed());
    // be able to handle recovery event after fatal error
    recoveryService.handle(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskId, "v1", 0L, 0L)));
    recoveryService.stop();
}
Also used : DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) Test(org.junit.Test)

Example 49 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestRecoveryParser method testRecoverableSummary_VertexGroupFinishCommitting.

@Test(timeout = 5000)
public void testRecoverableSummary_VertexGroupFinishCommitting() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    RecoveryService rService = new RecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // write a DAGSubmittedEvent first to initialize summaryStream
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    // It should be fine to skip other events, just for testing.
    TezVertexID v0 = TezVertexID.getInstance(dagID, 0);
    TezVertexID v1 = TezVertexID.getInstance(dagID, 1);
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitFinishedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
    // also write VertexFinishedEvent, otherwise it is still non-recoverable
    // when checking with non-summary event
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v0, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v1, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertEquals(dagID, dagData.recoveredDagID);
    assertFalse(dagData.nonRecoverable);
}
Also used : Path(org.apache.hadoop.fs.Path) RecoveryService(org.apache.tez.dag.history.recovery.RecoveryService) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Configuration(org.apache.hadoop.conf.Configuration) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) TezVertexID(org.apache.tez.dag.records.TezVertexID) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Example 50 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestRecoveryParser method testRecoverableNonSummary2.

@Test(timeout = 5000)
public void testRecoverableNonSummary2() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    // MockRecoveryService will skip the non-summary event
    MockRecoveryService rService = new MockRecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // write a DAGSubmittedEvent first to initialize summaryStream
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    // It should be fine to skip other events, just for testing.
    TezVertexID vertexId = TezVertexID.getInstance(dagID, 0);
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
    rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitFinishedEvent(dagID, "group_1", Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertTrue(dagData.nonRecoverable);
    assertTrue(dagData.reason.contains("Vertex has been committed as member of vertex group" + ", but its full recovery events are not seen"));
}
Also used : Path(org.apache.hadoop.fs.Path) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Configuration(org.apache.hadoop.conf.Configuration) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) TezVertexID(org.apache.tez.dag.records.TezVertexID) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Aggregations

DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)81 TezDAGID (org.apache.tez.dag.records.TezDAGID)38 Test (org.junit.Test)33 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)21 IOException (java.io.IOException)18 Configuration (org.apache.hadoop.conf.Configuration)18 DAGPlan (org.apache.tez.dag.api.records.DAGProtos.DAGPlan)18 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)17 Path (org.apache.hadoop.fs.Path)15 SystemClock (org.apache.hadoop.yarn.util.SystemClock)14 DAGRecoveryData (org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData)13 DAGStartedEvent (org.apache.tez.dag.history.events.DAGStartedEvent)11 RecoveryService (org.apache.tez.dag.history.recovery.RecoveryService)11 TezVertexID (org.apache.tez.dag.records.TezVertexID)10 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)6 TimelineEntity (org.apache.hadoop.yarn.api.records.timeline.TimelineEntity)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 DAGFinishedEvent (org.apache.tez.dag.history.events.DAGFinishedEvent)6 VertexFinishedEvent (org.apache.tez.dag.history.events.VertexFinishedEvent)6