Search in sources :

Example 1 with TaskRecoveryData

use of org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData in project tez by apache.

the class TestDAGRecovery method testVertexRecoverFromStart.

/**
 * RecoveryEvents:
 *  DAG:  DAGInitedEvent -> DAGStartedEvent
 *  V1:   VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent
 *
 * V1 skip initialization.
 */
@Test(timeout = 5000)
public void testVertexRecoverFromStart() {
    initMockDAGRecoveryDataForVertex();
    List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
    VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
    VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, null, true);
    VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
    VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap<TezTaskID, TaskRecoveryData>(), false);
    doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
    DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData);
    dag.handle(recoveryEvent);
    dispatcher.await();
    VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1");
    VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2");
    VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3");
    assertEquals(DAGState.RUNNING, dag.getState());
    // v1 skip initialization
    assertEquals(VertexState.RUNNING, v1.getState());
    assertEquals(v1InitedTime, v1.initedTime);
    assertEquals(v1StartedTime, v1.startedTime);
    assertEquals(v1NumTask, v1.getTotalTasks());
    assertEquals(VertexState.RUNNING, v2.getState());
    assertEquals(VertexState.RUNNING, v3.getState());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) ArrayList(java.util.ArrayList) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 2 with TaskRecoveryData

use of org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData in project tez by apache.

the class TestDAGRecovery method initMockDAGRecoveryDataForTask.

// ///////////////////////////// Task ////////////////////////////////////////////////////////////
private void initMockDAGRecoveryDataForTask() {
    List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
    VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
    Map<String, InputSpecUpdate> rootInputSpecs = new HashMap<String, InputSpecUpdate>();
    VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true);
    VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
    VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap<TezTaskID, TaskRecoveryData>(), false);
    DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null);
    DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagId, dagStartedTime, "user", "dagName");
    doReturn(v1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
    doReturn(dagInitedEvent).when(dagRecoveryData).getDAGInitializedEvent();
    doReturn(dagStartedEvent).when(dagRecoveryData).getDAGStartedEvent();
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) TezTaskID(org.apache.tez.dag.records.TezTaskID) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent)

Example 3 with TaskRecoveryData

use of org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData in project tez by apache.

the class TestDAGRecovery method testTaskRecoverFromStarted.

/**
 * RecoveryEvent: TaskStartedEvent
 * Recover it to Scheduled
 */
@Test(timeout = 5000)
public void testTaskRecoverFromStarted() {
    initMockDAGRecoveryDataForTask();
    TaskStartedEvent taskStartedEvent = new TaskStartedEvent(t1v1Id, "v1", 0L, 0L);
    TaskRecoveryData taskRecoveryData = new TaskRecoveryData(taskStartedEvent, null, null);
    doReturn(taskRecoveryData).when(dagRecoveryData).getTaskRecoveryData(t1v1Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    VertexImpl vertex1 = (VertexImpl) dag.getVertex(v1Id);
    TaskImpl task = (TaskImpl) vertex1.getTask(t1v1Id);
    assertEquals(TaskStateInternal.SCHEDULED, task.getInternalState());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 4 with TaskRecoveryData

use of org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData in project tez by apache.

the class TestDAGRecovery method testTaskRecoverFromKilled.

/**
 * RecoveryEvent: TaskFinishedEvent(KILLED)
 * Recover it to KILLED
 */
@Test(timeout = 5000)
public void testTaskRecoverFromKilled() {
    initMockDAGRecoveryDataForTask();
    TaskFinishedEvent taskFinishedEvent = new TaskFinishedEvent(t1v1Id, "v1", 0L, 0L, null, TaskState.KILLED, "", null, 4);
    TaskRecoveryData taskRecoveryData = new TaskRecoveryData(null, taskFinishedEvent, null);
    doReturn(taskRecoveryData).when(dagRecoveryData).getTaskRecoveryData(t1v1Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    VertexImpl vertex1 = (VertexImpl) dag.getVertex(v1Id);
    TaskImpl task = (TaskImpl) vertex1.getTask(t1v1Id);
    assertEquals(TaskStateInternal.KILLED, task.getInternalState());
    assertEquals(1, vertex1.getCompletedTasks());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 5 with TaskRecoveryData

use of org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData in project tez by apache.

the class TestRecoveryParser method testRecoveryData.

@Test(timeout = 5000)
public void testRecoveryData() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    RecoveryService rService = new RecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // DAG  DAGSubmittedEvent -> DAGInitializedEvent -> DAGStartedEvent
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagID, 100L, "user", "dagName", null);
    DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagID, 0L, "user", "dagName");
    rService.handle(new DAGHistoryEvent(dagID, dagInitedEvent));
    rService.handle(new DAGHistoryEvent(dagID, dagStartedEvent));
    // 3 vertices of this dag: v0, v1, v2
    TezVertexID v0Id = TezVertexID.getInstance(dagID, 0);
    TezVertexID v1Id = TezVertexID.getInstance(dagID, 1);
    TezVertexID v2Id = TezVertexID.getInstance(dagID, 2);
    // v0 VertexInitializedEvent
    VertexInitializedEvent v0InitedEvent = new VertexInitializedEvent(v0Id, "v0", 200L, 400L, 2, null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, v0InitedEvent));
    // v1 VertexFinishedEvent(KILLED)
    VertexFinishedEvent v1FinishedEvent = new VertexFinishedEvent(v1Id, "v1", 2, 300L, 400L, 500L, 600L, 700L, VertexState.KILLED, "", null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, v1FinishedEvent));
    // v2 VertexInitializedEvent -> VertexStartedEvent
    List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), null));
    VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "v2", 200L, 300L, 2, null, null, initGeneratedEvents, null);
    VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, 0L);
    rService.handle(new DAGHistoryEvent(dagID, v2InitedEvent));
    rService.handle(new DAGHistoryEvent(dagID, v2StartedEvent));
    // 3 tasks of v2
    TezTaskID t0v2Id = TezTaskID.getInstance(v2Id, 0);
    TezTaskID t1v2Id = TezTaskID.getInstance(v2Id, 1);
    TezTaskID t2v2Id = TezTaskID.getInstance(v2Id, 2);
    // t0v2 TaskStartedEvent
    TaskStartedEvent t0v2StartedEvent = new TaskStartedEvent(t0v2Id, "v2", 400L, 5000L);
    rService.handle(new DAGHistoryEvent(dagID, t0v2StartedEvent));
    // t1v2 TaskFinishedEvent
    TaskFinishedEvent t1v2FinishedEvent = new TaskFinishedEvent(t1v2Id, "v1", 0L, 0L, null, TaskState.KILLED, "", null, 4);
    rService.handle(new DAGHistoryEvent(dagID, t1v2FinishedEvent));
    // t2v2 TaskStartedEvent -> TaskFinishedEvent
    TaskStartedEvent t2v2StartedEvent = new TaskStartedEvent(t2v2Id, "v2", 400L, 500L);
    rService.handle(new DAGHistoryEvent(dagID, t2v2StartedEvent));
    TaskFinishedEvent t2v2FinishedEvent = new TaskFinishedEvent(t2v2Id, "v1", 0L, 0L, null, TaskState.SUCCEEDED, "", null, 4);
    rService.handle(new DAGHistoryEvent(dagID, t2v2FinishedEvent));
    // attempts under t0v2
    ContainerId containerId = ContainerId.newInstance(appAttemptId, 1);
    NodeId nodeId = NodeId.newInstance("localhost", 9999);
    TezTaskAttemptID ta0t0v2Id = TezTaskAttemptID.getInstance(t0v2Id, 0);
    TaskAttemptStartedEvent ta0t0v2StartedEvent = new TaskAttemptStartedEvent(ta0t0v2Id, "v1", 0L, containerId, nodeId, "", "", "");
    rService.handle(new DAGHistoryEvent(dagID, ta0t0v2StartedEvent));
    // attempts under t2v2
    TezTaskAttemptID ta0t2v2Id = TezTaskAttemptID.getInstance(t2v2Id, 0);
    TaskAttemptStartedEvent ta0t2v2StartedEvent = new TaskAttemptStartedEvent(ta0t2v2Id, "v1", 500L, containerId, nodeId, "", "", "");
    rService.handle(new DAGHistoryEvent(dagID, ta0t2v2StartedEvent));
    TaskAttemptFinishedEvent ta0t2v2FinishedEvent = new TaskAttemptFinishedEvent(ta0t2v2Id, "v1", 500L, 600L, TaskAttemptState.SUCCEEDED, null, null, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, ta0t2v2FinishedEvent));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertFalse(dagData.nonRecoverable);
    // There's no equals method for the history event, so here only verify the init/start/finish time of each event for simplicity
    assertEquals(dagInitedEvent.getInitTime(), dagData.getDAGInitializedEvent().getInitTime());
    assertEquals(dagStartedEvent.getStartTime(), dagData.getDAGStartedEvent().getStartTime());
    assertNull(dagData.getDAGFinishedEvent());
    VertexRecoveryData v0Data = dagData.getVertexRecoveryData(v0Id);
    VertexRecoveryData v1Data = dagData.getVertexRecoveryData(v1Id);
    VertexRecoveryData v2Data = dagData.getVertexRecoveryData(v2Id);
    assertNotNull(v0Data);
    assertNotNull(v1Data);
    assertNotNull(v2Data);
    assertEquals(v0InitedEvent.getInitedTime(), v0Data.getVertexInitedEvent().getInitedTime());
    assertNull(v0Data.getVertexStartedEvent());
    assertNull(v1Data.getVertexInitedEvent());
    assertEquals(v1FinishedEvent.getFinishTime(), v1Data.getVertexFinishedEvent().getFinishTime());
    assertEquals(v2InitedEvent.getInitedTime(), v2Data.getVertexInitedEvent().getInitedTime());
    assertEquals(v2StartedEvent.getStartTime(), v2Data.getVertexStartedEvent().getStartTime());
    TaskRecoveryData t0v2Data = dagData.getTaskRecoveryData(t0v2Id);
    TaskRecoveryData t1v2Data = dagData.getTaskRecoveryData(t1v2Id);
    TaskRecoveryData t2v2Data = dagData.getTaskRecoveryData(t2v2Id);
    assertNotNull(t0v2Data);
    assertNotNull(t1v2Data);
    assertNotNull(t2v2Data);
    assertEquals(t0v2StartedEvent.getStartTime(), t0v2Data.getTaskStartedEvent().getStartTime());
    assertNull(t0v2Data.getTaskFinishedEvent());
    assertEquals(t1v2FinishedEvent.getFinishTime(), t1v2Data.getTaskFinishedEvent().getFinishTime());
    assertNull(t1v2Data.getTaskStartedEvent());
    assertEquals(t2v2StartedEvent.getStartTime(), t2v2Data.getTaskStartedEvent().getStartTime());
    assertEquals(t2v2FinishedEvent.getFinishTime(), t2v2Data.getTaskFinishedEvent().getFinishTime());
    TaskAttemptRecoveryData ta0t0v2Data = dagData.getTaskAttemptRecoveryData(ta0t0v2Id);
    TaskAttemptRecoveryData ta0t2v2Data = dagData.getTaskAttemptRecoveryData(ta0t2v2Id);
    assertNotNull(ta0t0v2Data);
    assertNotNull(ta0t2v2Data);
    assertEquals(ta0t0v2StartedEvent.getStartTime(), ta0t0v2Data.getTaskAttemptStartedEvent().getStartTime());
    assertNull(ta0t0v2Data.getTaskAttemptFinishedEvent());
    assertEquals(ta0t2v2StartedEvent.getStartTime(), ta0t2v2Data.getTaskAttemptStartedEvent().getStartTime());
    assertEquals(ta0t2v2FinishedEvent.getFinishTime(), ta0t2v2Data.getTaskAttemptFinishedEvent().getFinishTime());
}
Also used : RecoveryService(org.apache.tez.dag.history.recovery.RecoveryService) Configuration(org.apache.hadoop.conf.Configuration) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) Path(org.apache.hadoop.fs.Path) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) SystemClock(org.apache.hadoop.yarn.util.SystemClock) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) NodeId(org.apache.hadoop.yarn.api.records.NodeId) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Aggregations

TaskRecoveryData (org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData)9 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)7 ArrayList (java.util.ArrayList)6 VertexRecoveryData (org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData)6 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)6 DAGEventRecoverEvent (org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent)6 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)6 TezTaskID (org.apache.tez.dag.records.TezTaskID)6 Test (org.junit.Test)6 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)5 VertexConfigurationDoneEvent (org.apache.tez.dag.history.events.VertexConfigurationDoneEvent)5 VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)5 HashMap (java.util.HashMap)4 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 NodeId (org.apache.hadoop.yarn.api.records.NodeId)3 TaskAttemptRecoveryData (org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData)3 DAGInitializedEvent (org.apache.tez.dag.history.events.DAGInitializedEvent)3 DAGStartedEvent (org.apache.tez.dag.history.events.DAGStartedEvent)3 TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)3 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)3