use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.
the class TestRecoveryService method testRecoveryFlushOnSummaryEvent.
@Test(timeout = 5000)
public void testRecoveryFlushOnSummaryEvent() throws Exception {
setup(true, new String[][] { { TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, "-1" }, { TezConfiguration.DAG_RECOVERY_FLUSH_INTERVAL_SECS, "-1" } });
recoveryService.start();
DAGPlan dagPlan = DAGPlan.newBuilder().setName("test_dag").build();
// This writes to recovery immediately.
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, startTime, dagPlan, appAttemptId, null, "nobody", conf, null, "default")));
waitForDrain(-1);
verify(summaryFos, times(1)).hflush();
verify(dagFos, times(1)).hflush();
// This does not write to recovery immediately.
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGCommitStartedEvent(dagId, startTime)));
waitForDrain(-1);
verify(summaryFos, times(2)).hflush();
verify(dagFos, times(1)).hflush();
// Does flush on stop.
recoveryService.stop();
verify(dagFos, times(2)).hflush();
}
use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.
the class TestRecoveryService method testSummaryPathExisted.
@Test(timeout = 5000)
public void testSummaryPathExisted() throws Exception {
setup(false, null);
recoveryService.start();
touchFile(summaryPath);
assertFalse(recoveryService.hasRecoveryFailed());
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGFinishedEvent(dagId, 1L, 2L, DAGState.ERROR, "diag", null, "user", "dag1", null, appAttemptId, null)));
assertTrue(recoveryService.hasRecoveryFailed());
// be able to handle event after fatal error
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGFinishedEvent(dagId, 1L, 2L, DAGState.ERROR, "diag", null, "user", "dag1", null, appAttemptId, null)));
recoveryService.stop();
}
use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.
the class TestRecoveryService method testRecoveryPathExisted.
@Test(timeout = 5000)
public void testRecoveryPathExisted() throws Exception {
setup(false, null);
recoveryService.start();
touchFile(dagRecoveryPath);
assertFalse(recoveryService.hasRecoveryFailed());
recoveryService.handle(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskId, "v1", 0L, 0L)));
// wait for recovery event to be handled
recoveryService.await();
assertTrue(recoveryService.hasRecoveryFailed());
// be able to handle recovery event after fatal error
recoveryService.handle(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskId, "v1", 0L, 0L)));
recoveryService.stop();
}
use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.
the class TestRecoveryParser method testRecoverableSummary_VertexGroupFinishCommitting.
@Test(timeout = 5000)
public void testRecoverableSummary_VertexGroupFinishCommitting() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write a DAGSubmittedEvent first to initialize summaryStream
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// It should be fine to skip other events, just for testing.
TezVertexID v0 = TezVertexID.getInstance(dagID, 0);
TezVertexID v1 = TezVertexID.getInstance(dagID, 1);
rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitFinishedEvent(dagID, "group_1", Lists.newArrayList(v0, v1), 0L)));
// also write VertexFinishedEvent, otherwise it is still non-recoverable
// when checking with non-summary event
rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v0, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(v1, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertEquals(dagID, dagData.recoveredDagID);
assertFalse(dagData.nonRecoverable);
}
use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.
the class TestRecoveryParser method testRecoverableNonSummary2.
@Test(timeout = 5000)
public void testRecoverableNonSummary2() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
// MockRecoveryService will skip the non-summary event
MockRecoveryService rService = new MockRecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write a DAGSubmittedEvent first to initialize summaryStream
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// It should be fine to skip other events, just for testing.
TezVertexID vertexId = TezVertexID.getInstance(dagID, 0);
rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitFinishedEvent(dagID, "group_1", Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertTrue(dagData.nonRecoverable);
assertTrue(dagData.reason.contains("Vertex has been committed as member of vertex group" + ", but its full recovery events are not seen"));
}
Aggregations