Search in sources :

Example 1 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TaskAttemptImpl method logJobHistoryAttemptUnsuccesfulCompletion.

protected void logJobHistoryAttemptUnsuccesfulCompletion(TaskAttemptState state, TaskFailureType taskFailureType) {
    Preconditions.checkArgument(recoveryData == null || recoveryData.getTaskAttemptFinishedEvent() == null, "log TaskAttemptFinishedEvent again in recovery when there's already another TaskAtttemptFinishedEvent");
    if (state == TaskAttemptState.FAILED && taskFailureType == null) {
        throw new IllegalStateException("FAILED state must be accompanied by a FailureType");
    }
    long finishTime = getFinishTime();
    ContainerId unsuccessfulContainerId = null;
    NodeId unsuccessfulContainerNodeId = null;
    String inProgressLogsUrl = null;
    String completedLogsUrl = null;
    if (finishTime <= 0) {
        // comes here in case it was terminated before launch
        finishTime = clock.getTime();
        unsuccessfulContainerId = containerId;
        unsuccessfulContainerNodeId = containerNodeId;
        inProgressLogsUrl = getInProgressLogsUrl();
        completedLogsUrl = getCompletedLogsUrl();
    }
    TaskAttemptFinishedEvent finishEvt = new TaskAttemptFinishedEvent(attemptId, getVertex().getName(), getLaunchTime(), finishTime, state, taskFailureType, terminationCause, StringUtils.join(getDiagnostics(), LINE_SEPARATOR), getCounters(), lastDataEvents, taGeneratedEvents, creationTime, creationCausalTA, allocationTime, unsuccessfulContainerId, unsuccessfulContainerNodeId, inProgressLogsUrl, completedLogsUrl, nodeHttpAddress);
    // FIXME how do we store information regd completion events
    this.appContext.getHistoryHandler().handle(new DAGHistoryEvent(getDAGID(), finishEvt));
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)

Example 2 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestDAGRecovery method testTARecoverFromNewToFailed.

/**
 * RecoveryEvents: TaskAttemptFinishedEvent (FAILED)
 * Recover it to FAILED
 */
@Test(timeout = 5000)
public void testTARecoverFromNewToFailed() {
    initMockDAGRecoveryDataForTaskAttempt();
    TaskAttemptFinishedEvent taFinishedEvent = new TaskAttemptFinishedEvent(ta1t1v1Id, "v1", ta1LaunchTime, ta1FinishedTime, TaskAttemptState.FAILED, TaskFailureType.NON_FATAL, TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
    TaskAttemptRecoveryData taRecoveryData = new TaskAttemptRecoveryData(null, taFinishedEvent);
    doReturn(taRecoveryData).when(dagRecoveryData).getTaskAttemptRecoveryData(ta1t1v1Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    TaskImpl task = (TaskImpl) dag.getVertex(v1Id).getTask(t1v1Id);
    TaskAttemptImpl taskAttempt = (TaskAttemptImpl) task.getAttempt(ta1t1v1Id);
    assertEquals(TaskAttemptStateInternal.FAILED, taskAttempt.getInternalState());
    assertEquals(TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED, taskAttempt.getTerminationCause());
    historyEventHandler.verifyHistoryEvent(0, HistoryEventType.TASK_ATTEMPT_STARTED);
    historyEventHandler.verifyHistoryEvent(0, HistoryEventType.TASK_ATTEMPT_FINISHED);
    assertEquals(1, task.failedAttempts);
    // new task attempt is scheduled
    assertEquals(2, task.getAttempts().size());
    assertEquals(ta1FinishedTime, taskAttempt.getFinishTime());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 3 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestDAGRecovery method testTARecoverFromSucceeded.

/**
 * RecoveryEvents: TaskAttemptStartedEvent -> TaskAttemptFinishedEvent (SUCCEEDED)
 * Recover it to SUCCEEDED
 */
@Test(timeout = 5000)
public void testTARecoverFromSucceeded() {
    initMockDAGRecoveryDataForTaskAttempt();
    TaskAttemptStartedEvent taStartedEvent = new TaskAttemptStartedEvent(ta1t1v1Id, "v1", ta1LaunchTime, mock(ContainerId.class), mock(NodeId.class), "", "", "");
    List<TezEvent> taGeneratedEvents = new ArrayList<TezEvent>();
    EventMetaData sourceInfo = new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex3", ta1t1v1Id);
    taGeneratedEvents.add(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), sourceInfo));
    TaskAttemptFinishedEvent taFinishedEvent = new TaskAttemptFinishedEvent(ta1t1v1Id, "v1", ta1LaunchTime, ta1FinishedTime, TaskAttemptState.SUCCEEDED, null, null, "", null, null, taGeneratedEvents, 0L, null, 0L, null, null, null, null, null);
    TaskAttemptRecoveryData taRecoveryData = new TaskAttemptRecoveryData(taStartedEvent, taFinishedEvent);
    doReturn(taRecoveryData).when(dagRecoveryData).getTaskAttemptRecoveryData(ta1t1v1Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    TaskImpl task = (TaskImpl) dag.getVertex(v1Id).getTask(t1v1Id);
    TaskAttemptImpl taskAttempt = (TaskAttemptImpl) task.getAttempt(ta1t1v1Id);
    assertEquals(TaskAttemptStateInternal.SUCCEEDED, taskAttempt.getInternalState());
    historyEventHandler.verifyHistoryEvent(0, HistoryEventType.TASK_ATTEMPT_FINISHED);
    assertEquals(TaskStateInternal.SUCCEEDED, task.getInternalState());
    assertEquals(ta1LaunchTime, taskAttempt.getLaunchTime());
    assertEquals(ta1FinishedTime, taskAttempt.getFinishTime());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ArrayList(java.util.ArrayList) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 4 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestTaskAttempt method testMultipleOutputFailed.

@Test(timeout = 5000)
public // TaskAttempt.
void testMultipleOutputFailed() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
    MockEventHandler mockEh = new MockEventHandler();
    MockEventHandler eventHandler = spy(mockEh);
    TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener();
    Configuration taskConf = new Configuration();
    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    taskConf.setBoolean("fs.file.impl.disable.cache", true);
    locationHint = TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(new String[] { "127.0.0.1" })), null);
    Resource resource = Resource.newInstance(1024, 1);
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    @SuppressWarnings("deprecation") ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appCtx);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
    doReturn(containers).when(appCtx).getAllContainers();
    HistoryEventHandler mockHistHandler = mock(HistoryEventHandler.class);
    doReturn(mockHistHandler).when(appCtx).getHistoryHandler();
    TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class);
    MockTaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false);
    TezTaskAttemptID taskAttemptID = taImpl.getID();
    taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
    taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId));
    taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID));
    verify(mockHeartbeatHandler).register(taskAttemptID);
    taImpl.handle(new TaskAttemptEvent(taskAttemptID, TaskAttemptEventType.TA_DONE));
    assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED);
    verify(mockHeartbeatHandler).unregister(taskAttemptID);
    int expectedEventsTillSucceeded = 8;
    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
    ArgumentCaptor<DAGHistoryEvent> histArg = ArgumentCaptor.forClass(DAGHistoryEvent.class);
    verify(eventHandler, times(expectedEventsTillSucceeded)).handle(arg.capture());
    // start and finish
    verify(mockHistHandler, times(2)).handle(histArg.capture());
    DAGHistoryEvent histEvent = histArg.getValue();
    TaskAttemptFinishedEvent finishEvent = (TaskAttemptFinishedEvent) histEvent.getHistoryEvent();
    long finishTime = finishEvent.getFinishTime();
    verifyEventType(arg.getAllValues(), TaskEventTAUpdate.class, 2);
    InputReadErrorEvent mockReEvent = InputReadErrorEvent.create("", 0, 1);
    EventMetaData mockMeta = mock(EventMetaData.class);
    TezTaskAttemptID mockDestId1 = mock(TezTaskAttemptID.class);
    when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1);
    TezEvent tzEvent = new TezEvent(mockReEvent, mockMeta);
    taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11));
    // failure threshold not met. state is SUCCEEDED
    assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED);
    // sending same error again doesnt change anything
    taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11));
    assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED);
    // default value of error cause
    assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, taImpl.getTerminationCause());
    // different destination attempt reports error. now threshold crossed
    TezTaskAttemptID mockDestId2 = mock(TezTaskAttemptID.class);
    when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId2);
    taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11));
    assertEquals("Task attempt is not in FAILED state", taImpl.getState(), TaskAttemptState.FAILED);
    assertEquals(TaskAttemptTerminationCause.OUTPUT_LOST, taImpl.getTerminationCause());
    // verify unregister is not invoked again
    verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID);
    verify(mockHistHandler, times(3)).handle(histArg.capture());
    histEvent = histArg.getValue();
    finishEvent = (TaskAttemptFinishedEvent) histEvent.getHistoryEvent();
    assertEquals(TaskFailureType.NON_FATAL, finishEvent.getTaskFailureType());
    long newFinishTime = finishEvent.getFinishTime();
    Assert.assertEquals(finishTime, newFinishTime);
    assertEquals(true, taImpl.inputFailedReported);
    int expectedEventsAfterFetchFailure = expectedEventsTillSucceeded + 2;
    arg.getAllValues().clear();
    verify(eventHandler, times(expectedEventsAfterFetchFailure)).handle(arg.capture());
    Event e = verifyEventType(arg.getAllValues().subList(expectedEventsTillSucceeded, expectedEventsAfterFetchFailure), TaskEventTAFailed.class, 1);
    TaskEventTAFailed failedEvent = (TaskEventTAFailed) e;
    assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType());
    taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 1));
    assertEquals("Task attempt is not in FAILED state, still", taImpl.getState(), TaskAttemptState.FAILED);
    assertFalse("InternalError occurred trying to handle TA_TOO_MANY_FETCH_FAILURES", eventHandler.internalError);
    // No new events.
    verify(eventHandler, times(expectedEventsAfterFetchFailure)).handle(arg.capture());
    taskConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, 1);
    TezTaskID taskID2 = TezTaskID.getInstance(vertexID, 2);
    MockTaskAttemptImpl taImpl2 = new MockTaskAttemptImpl(taskID2, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false);
    TezTaskAttemptID taskAttemptID2 = taImpl2.getID();
    taImpl2.handle(new TaskAttemptEventSchedule(taskAttemptID2, 0, 0));
    taImpl2.handle(new TaskAttemptEventSubmitted(taskAttemptID2, contId));
    taImpl2.handle(new TaskAttemptEventStartedRemotely(taskAttemptID2));
    verify(mockHeartbeatHandler).register(taskAttemptID2);
    taImpl2.handle(new TaskAttemptEvent(taskAttemptID2, TaskAttemptEventType.TA_DONE));
    assertEquals("Task attempt is not in succeeded state", taImpl2.getState(), TaskAttemptState.SUCCEEDED);
    verify(mockHeartbeatHandler).unregister(taskAttemptID2);
    mockReEvent = InputReadErrorEvent.create("", 1, 1);
    mockMeta = mock(EventMetaData.class);
    mockDestId1 = mock(TezTaskAttemptID.class);
    when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1);
    tzEvent = new TezEvent(mockReEvent, mockMeta);
    // This should fail even when MAX_ALLOWED_OUTPUT_FAILURES_FRACTION is within limits, as
    // MAX_ALLOWED_OUTPUT_FAILURES has crossed the limit.
    taImpl2.handle(new TaskAttemptEventOutputFailed(taskAttemptID2, tzEvent, 8));
    assertEquals("Task attempt is not in failed state", taImpl2.getState(), TaskAttemptState.FAILED);
    assertEquals(TaskAttemptTerminationCause.OUTPUT_LOST, taImpl2.getTerminationCause());
    // verify unregister is not invoked again
    verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID2);
    Clock mockClock = mock(Clock.class);
    int readErrorTimespanSec = 1;
    taskConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, 10);
    taskConf.setInt(TezConfiguration.TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC, readErrorTimespanSec);
    TezTaskID taskID3 = TezTaskID.getInstance(vertexID, 3);
    MockTaskAttemptImpl taImpl3 = new MockTaskAttemptImpl(taskID3, 1, eventHandler, taListener, taskConf, mockClock, mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false);
    TezTaskAttemptID taskAttemptID3 = taImpl3.getID();
    taImpl3.handle(new TaskAttemptEventSchedule(taskAttemptID3, 0, 0));
    taImpl3.handle(new TaskAttemptEventSubmitted(taskAttemptID3, contId));
    taImpl3.handle(new TaskAttemptEventStartedRemotely(taskAttemptID3));
    verify(mockHeartbeatHandler).register(taskAttemptID3);
    taImpl3.handle(new TaskAttemptEvent(taskAttemptID3, TaskAttemptEventType.TA_DONE));
    assertEquals("Task attempt is not in succeeded state", taImpl3.getState(), TaskAttemptState.SUCCEEDED);
    verify(mockHeartbeatHandler).unregister(taskAttemptID3);
    mockReEvent = InputReadErrorEvent.create("", 1, 1);
    mockMeta = mock(EventMetaData.class);
    mockDestId1 = mock(TezTaskAttemptID.class);
    when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1);
    tzEvent = new TezEvent(mockReEvent, mockMeta);
    when(mockClock.getTime()).thenReturn(1000L);
    // time deadline not exceeded for a couple of read error events
    taImpl3.handle(new TaskAttemptEventOutputFailed(taskAttemptID3, tzEvent, 1000));
    assertEquals("Task attempt is not in succeeded state", taImpl3.getState(), TaskAttemptState.SUCCEEDED);
    when(mockClock.getTime()).thenReturn(1500L);
    taImpl3.handle(new TaskAttemptEventOutputFailed(taskAttemptID3, tzEvent, 1000));
    assertEquals("Task attempt is not in succeeded state", taImpl3.getState(), TaskAttemptState.SUCCEEDED);
    // exceed the time threshold
    when(mockClock.getTime()).thenReturn(2001L);
    taImpl3.handle(new TaskAttemptEventOutputFailed(taskAttemptID3, tzEvent, 1000));
    assertEquals("Task attempt is not in FAILED state", taImpl3.getState(), TaskAttemptState.FAILED);
    assertEquals(TaskAttemptTerminationCause.OUTPUT_LOST, taImpl3.getTerminationCause());
    // verify unregister is not invoked again
    verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID3);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) Clock(org.apache.hadoop.yarn.util.Clock) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Container(org.apache.hadoop.yarn.api.records.Container) HistoryEventHandler(org.apache.tez.dag.history.HistoryEventHandler) TaskAttemptEventOutputFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) TaskHeartbeatHandler(org.apache.tez.dag.app.TaskHeartbeatHandler) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) HashSet(java.util.HashSet) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TaskEventTAFailed(org.apache.tez.dag.app.dag.event.TaskEventTAFailed) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TaskAttemptEvent(org.apache.tez.dag.app.dag.event.TaskAttemptEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TaskEvent(org.apache.tez.dag.app.dag.event.TaskEvent) DAGEvent(org.apache.tez.dag.app.dag.event.DAGEvent) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) TaskAttemptEvent(org.apache.tez.dag.app.dag.event.TaskAttemptEvent) Event(org.apache.hadoop.yarn.event.Event) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 5 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestRecoveryParser method testRecoveryData.

@Test(timeout = 5000)
public void testRecoveryData() throws IOException {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    AppContext appContext = mock(AppContext.class);
    when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
    when(appContext.getClock()).thenReturn(new SystemClock());
    when(mockDAGImpl.getID()).thenReturn(dagID);
    when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
    when(appContext.getApplicationID()).thenReturn(appId);
    RecoveryService rService = new RecoveryService(appContext);
    Configuration conf = new Configuration();
    conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
    rService.init(conf);
    rService.start();
    DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
    // DAG  DAGSubmittedEvent -> DAGInitializedEvent -> DAGStartedEvent
    rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
    DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagID, 100L, "user", "dagName", null);
    DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagID, 0L, "user", "dagName");
    rService.handle(new DAGHistoryEvent(dagID, dagInitedEvent));
    rService.handle(new DAGHistoryEvent(dagID, dagStartedEvent));
    // 3 vertices of this dag: v0, v1, v2
    TezVertexID v0Id = TezVertexID.getInstance(dagID, 0);
    TezVertexID v1Id = TezVertexID.getInstance(dagID, 1);
    TezVertexID v2Id = TezVertexID.getInstance(dagID, 2);
    // v0 VertexInitializedEvent
    VertexInitializedEvent v0InitedEvent = new VertexInitializedEvent(v0Id, "v0", 200L, 400L, 2, null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, v0InitedEvent));
    // v1 VertexFinishedEvent(KILLED)
    VertexFinishedEvent v1FinishedEvent = new VertexFinishedEvent(v1Id, "v1", 2, 300L, 400L, 500L, 600L, 700L, VertexState.KILLED, "", null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, v1FinishedEvent));
    // v2 VertexInitializedEvent -> VertexStartedEvent
    List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), null));
    VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "v2", 200L, 300L, 2, null, null, initGeneratedEvents, null);
    VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, 0L);
    rService.handle(new DAGHistoryEvent(dagID, v2InitedEvent));
    rService.handle(new DAGHistoryEvent(dagID, v2StartedEvent));
    // 3 tasks of v2
    TezTaskID t0v2Id = TezTaskID.getInstance(v2Id, 0);
    TezTaskID t1v2Id = TezTaskID.getInstance(v2Id, 1);
    TezTaskID t2v2Id = TezTaskID.getInstance(v2Id, 2);
    // t0v2 TaskStartedEvent
    TaskStartedEvent t0v2StartedEvent = new TaskStartedEvent(t0v2Id, "v2", 400L, 5000L);
    rService.handle(new DAGHistoryEvent(dagID, t0v2StartedEvent));
    // t1v2 TaskFinishedEvent
    TaskFinishedEvent t1v2FinishedEvent = new TaskFinishedEvent(t1v2Id, "v1", 0L, 0L, null, TaskState.KILLED, "", null, 4);
    rService.handle(new DAGHistoryEvent(dagID, t1v2FinishedEvent));
    // t2v2 TaskStartedEvent -> TaskFinishedEvent
    TaskStartedEvent t2v2StartedEvent = new TaskStartedEvent(t2v2Id, "v2", 400L, 500L);
    rService.handle(new DAGHistoryEvent(dagID, t2v2StartedEvent));
    TaskFinishedEvent t2v2FinishedEvent = new TaskFinishedEvent(t2v2Id, "v1", 0L, 0L, null, TaskState.SUCCEEDED, "", null, 4);
    rService.handle(new DAGHistoryEvent(dagID, t2v2FinishedEvent));
    // attempts under t0v2
    ContainerId containerId = ContainerId.newInstance(appAttemptId, 1);
    NodeId nodeId = NodeId.newInstance("localhost", 9999);
    TezTaskAttemptID ta0t0v2Id = TezTaskAttemptID.getInstance(t0v2Id, 0);
    TaskAttemptStartedEvent ta0t0v2StartedEvent = new TaskAttemptStartedEvent(ta0t0v2Id, "v1", 0L, containerId, nodeId, "", "", "");
    rService.handle(new DAGHistoryEvent(dagID, ta0t0v2StartedEvent));
    // attempts under t2v2
    TezTaskAttemptID ta0t2v2Id = TezTaskAttemptID.getInstance(t2v2Id, 0);
    TaskAttemptStartedEvent ta0t2v2StartedEvent = new TaskAttemptStartedEvent(ta0t2v2Id, "v1", 500L, containerId, nodeId, "", "", "");
    rService.handle(new DAGHistoryEvent(dagID, ta0t2v2StartedEvent));
    TaskAttemptFinishedEvent ta0t2v2FinishedEvent = new TaskAttemptFinishedEvent(ta0t2v2Id, "v1", 500L, 600L, TaskAttemptState.SUCCEEDED, null, null, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
    rService.handle(new DAGHistoryEvent(dagID, ta0t2v2FinishedEvent));
    rService.stop();
    DAGRecoveryData dagData = parser.parseRecoveryData();
    assertFalse(dagData.nonRecoverable);
    // There's no equals method for the history event, so here only verify the init/start/finish time of each event for simplicity
    assertEquals(dagInitedEvent.getInitTime(), dagData.getDAGInitializedEvent().getInitTime());
    assertEquals(dagStartedEvent.getStartTime(), dagData.getDAGStartedEvent().getStartTime());
    assertNull(dagData.getDAGFinishedEvent());
    VertexRecoveryData v0Data = dagData.getVertexRecoveryData(v0Id);
    VertexRecoveryData v1Data = dagData.getVertexRecoveryData(v1Id);
    VertexRecoveryData v2Data = dagData.getVertexRecoveryData(v2Id);
    assertNotNull(v0Data);
    assertNotNull(v1Data);
    assertNotNull(v2Data);
    assertEquals(v0InitedEvent.getInitedTime(), v0Data.getVertexInitedEvent().getInitedTime());
    assertNull(v0Data.getVertexStartedEvent());
    assertNull(v1Data.getVertexInitedEvent());
    assertEquals(v1FinishedEvent.getFinishTime(), v1Data.getVertexFinishedEvent().getFinishTime());
    assertEquals(v2InitedEvent.getInitedTime(), v2Data.getVertexInitedEvent().getInitedTime());
    assertEquals(v2StartedEvent.getStartTime(), v2Data.getVertexStartedEvent().getStartTime());
    TaskRecoveryData t0v2Data = dagData.getTaskRecoveryData(t0v2Id);
    TaskRecoveryData t1v2Data = dagData.getTaskRecoveryData(t1v2Id);
    TaskRecoveryData t2v2Data = dagData.getTaskRecoveryData(t2v2Id);
    assertNotNull(t0v2Data);
    assertNotNull(t1v2Data);
    assertNotNull(t2v2Data);
    assertEquals(t0v2StartedEvent.getStartTime(), t0v2Data.getTaskStartedEvent().getStartTime());
    assertNull(t0v2Data.getTaskFinishedEvent());
    assertEquals(t1v2FinishedEvent.getFinishTime(), t1v2Data.getTaskFinishedEvent().getFinishTime());
    assertNull(t1v2Data.getTaskStartedEvent());
    assertEquals(t2v2StartedEvent.getStartTime(), t2v2Data.getTaskStartedEvent().getStartTime());
    assertEquals(t2v2FinishedEvent.getFinishTime(), t2v2Data.getTaskFinishedEvent().getFinishTime());
    TaskAttemptRecoveryData ta0t0v2Data = dagData.getTaskAttemptRecoveryData(ta0t0v2Id);
    TaskAttemptRecoveryData ta0t2v2Data = dagData.getTaskAttemptRecoveryData(ta0t2v2Id);
    assertNotNull(ta0t0v2Data);
    assertNotNull(ta0t2v2Data);
    assertEquals(ta0t0v2StartedEvent.getStartTime(), ta0t0v2Data.getTaskAttemptStartedEvent().getStartTime());
    assertNull(ta0t0v2Data.getTaskAttemptFinishedEvent());
    assertEquals(ta0t2v2StartedEvent.getStartTime(), ta0t2v2Data.getTaskAttemptStartedEvent().getStartTime());
    assertEquals(ta0t2v2FinishedEvent.getFinishTime(), ta0t2v2Data.getTaskAttemptFinishedEvent().getFinishTime());
}
Also used : RecoveryService(org.apache.tez.dag.history.recovery.RecoveryService) Configuration(org.apache.hadoop.conf.Configuration) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) Path(org.apache.hadoop.fs.Path) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) SystemClock(org.apache.hadoop.yarn.util.SystemClock) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) NodeId(org.apache.hadoop.yarn.api.records.NodeId) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Aggregations

TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)18 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)11 Test (org.junit.Test)11 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)8 TaskAttemptRecoveryData (org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData)8 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)8 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)7 DAGEventRecoverEvent (org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent)7 VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)7 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)6 TaskFinishedEvent (org.apache.tez.dag.history.events.TaskFinishedEvent)6 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)6 VertexFinishedEvent (org.apache.tez.dag.history.events.VertexFinishedEvent)5 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 AMStartedEvent (org.apache.tez.dag.history.events.AMStartedEvent)4 ContainerLaunchedEvent (org.apache.tez.dag.history.events.ContainerLaunchedEvent)4