Search in sources :

Example 6 with TaskAttemptEventAttemptFailed

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed in project tez by apache.

the class TestTaskImpl method testFailedTaskTransitionWithLaunchedAttempt.

@Test(timeout = 30000)
public void testFailedTaskTransitionWithLaunchedAttempt() throws InterruptedException {
    Configuration newConf = new Configuration(conf);
    newConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
    Vertex vertex = mock(Vertex.class);
    doReturn(new VertexImpl.VertexConfigImpl(newConf)).when(vertex).getVertexConfig();
    mockTask = new MockTaskImpl(vertexId, partition, eventHandler, conf, taskCommunicatorManagerInterface, clock, taskHeartbeatHandler, appContext, leafVertex, taskResource, containerContext, vertex);
    TezTaskID taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getLastAttempt();
    launchTaskAttempt(firstMockTaskAttempt.getID());
    mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID()));
    MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getLastAttempt();
    launchTaskAttempt(secondMockTaskAttempt.getID());
    firstMockTaskAttempt.handle(new TaskAttemptEventSchedule(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10));
    secondMockTaskAttempt.handle(new TaskAttemptEventSchedule(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), 10, 10));
    firstMockTaskAttempt.handle(new TaskAttemptEventSubmitted(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), mockContainer.getId()));
    secondMockTaskAttempt.handle(new TaskAttemptEventSubmitted(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), mockContainer.getId()));
    secondMockTaskAttempt.handle(new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString())));
    firstMockTaskAttempt.handle(new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString())));
    secondMockTaskAttempt.handle(new TaskAttemptEventAttemptFailed(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "test", TaskAttemptTerminationCause.NO_PROGRESS));
    firstMockTaskAttempt.handle(new TaskAttemptEventAttemptFailed(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "test", TaskAttemptTerminationCause.NO_PROGRESS));
    firstMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, firstMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS));
    secondMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, secondMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS));
    mockTask.handle(new TaskEventTAFailed(secondMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class)));
    mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class)));
    assertTrue("Attempts should have failed!", firstMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.FAILED && secondMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.FAILED);
    assertEquals("Task should have no uncompleted attempts!", 0, mockTask.getUncompletedAttemptsCount());
    assertTrue("Task should have failed!", mockTask.getState() == TaskState.FAILED);
    mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID()));
    MockTaskAttemptImpl thirdMockTaskAttempt = mockTask.getLastAttempt();
    mockTask.handle(createTaskTALauncherEvent(thirdMockTaskAttempt.getID()));
}
Also used : Vertex(org.apache.tez.dag.app.dag.Vertex) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TaskEventTAFailed(org.apache.tez.dag.app.dag.event.TaskEventTAFailed) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskAttemptEventContainerTerminated(org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) Test(org.junit.Test)

Example 7 with TaskAttemptEventAttemptFailed

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed in project tez by apache.

the class TestTaskCommunicatorManager2 method testTaskAttemptFailedKilled.

@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testTaskAttemptFailedKilled() throws IOException, TezException {
    TaskCommunicatorManagerWrapperForTest wrapper = new TaskCommunicatorManagerWrapperForTest();
    TaskSpec taskSpec1 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);
    TaskSpec taskSpec2 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);
    ContainerId containerId1 = wrapper.createContainerId(1);
    wrapper.registerRunningContainer(containerId1);
    wrapper.registerTaskAttempt(containerId1, amContainerTask1);
    ContainerId containerId2 = wrapper.createContainerId(2);
    wrapper.registerRunningContainer(containerId2);
    wrapper.registerTaskAttempt(containerId2, amContainerTask2);
    wrapper.getTaskCommunicatorManager().taskFailed(amContainerTask1.getTask().getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.COMMUNICATION_ERROR, "Diagnostics1");
    wrapper.getTaskCommunicatorManager().taskKilled(amContainerTask2.getTask().getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Diagnostics2");
    ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(wrapper.getEventHandler(), times(2)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    assertTrue(argumentCaptor.getAllValues().get(1) instanceof TaskAttemptEventAttemptKilled);
    TaskAttemptEventAttemptFailed failedEvent = (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    TaskAttemptEventAttemptKilled killedEvent = (TaskAttemptEventAttemptKilled) argumentCaptor.getAllValues().get(1);
    assertEquals("Diagnostics1", failedEvent.getDiagnosticInfo());
    assertEquals(TaskAttemptTerminationCause.COMMUNICATION_ERROR, failedEvent.getTerminationCause());
    assertEquals("Diagnostics2", killedEvent.getDiagnosticInfo());
    assertEquals(TaskAttemptTerminationCause.SERVICE_BUSY, killedEvent.getTerminationCause());
// TODO TEZ-2003. Verify unregistration from the registered list
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) Event(org.apache.hadoop.yarn.event.Event) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventAttemptKilled(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled) AMContainerTask(org.apache.tez.dag.app.rm.container.AMContainerTask) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) Test(org.junit.Test)

Example 8 with TaskAttemptEventAttemptFailed

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed in project tez by apache.

the class TestTaskCommunicatorManager2 method testTaskAttemptFailureViaHeartbeat.

// Tests fatal and non fatal
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testTaskAttemptFailureViaHeartbeat() throws IOException, TezException {
    TaskCommunicatorManagerWrapperForTest wrapper = new TaskCommunicatorManagerWrapperForTest();
    TaskSpec taskSpec1 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);
    TaskSpec taskSpec2 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);
    ContainerId containerId1 = wrapper.createContainerId(1);
    wrapper.registerRunningContainer(containerId1);
    wrapper.registerTaskAttempt(containerId1, amContainerTask1);
    ContainerId containerId2 = wrapper.createContainerId(2);
    wrapper.registerRunningContainer(containerId2);
    wrapper.registerTaskAttempt(containerId2, amContainerTask2);
    List<TezEvent> events = new LinkedList<>();
    EventMetaData sourceInfo1 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, "testVertex", null, taskSpec1.getTaskAttemptID());
    TaskAttemptFailedEvent failedEvent1 = new TaskAttemptFailedEvent("non-fatal test error", TaskFailureType.NON_FATAL);
    TezEvent failedEventT1 = new TezEvent(failedEvent1, sourceInfo1);
    events.add(failedEventT1);
    TaskHeartbeatRequest taskHeartbeatRequest1 = new TaskHeartbeatRequest(containerId1.toString(), taskSpec1.getTaskAttemptID(), events, 0, 0, 0);
    wrapper.getTaskCommunicatorManager().heartbeat(taskHeartbeatRequest1);
    ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(wrapper.getEventHandler(), times(1)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    TaskAttemptEventAttemptFailed failedEvent = (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType());
    assertTrue(failedEvent.getDiagnosticInfo().contains("non-fatal"));
    events.clear();
    reset(wrapper.getEventHandler());
    EventMetaData sourceInfo2 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, "testVertex", null, taskSpec2.getTaskAttemptID());
    TaskAttemptFailedEvent failedEvent2 = new TaskAttemptFailedEvent("-fatal- test error", TaskFailureType.FATAL);
    TezEvent failedEventT2 = new TezEvent(failedEvent2, sourceInfo2);
    events.add(failedEventT2);
    TaskHeartbeatRequest taskHeartbeatRequest2 = new TaskHeartbeatRequest(containerId2.toString(), taskSpec2.getTaskAttemptID(), events, 0, 0, 0);
    wrapper.getTaskCommunicatorManager().heartbeat(taskHeartbeatRequest2);
    argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(wrapper.getEventHandler(), times(1)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    failedEvent = (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    assertEquals(TaskFailureType.FATAL, failedEvent.getTaskFailureType());
    assertTrue(failedEvent.getDiagnosticInfo().contains("-fatal-"));
}
Also used : TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) LinkedList(java.util.LinkedList) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskHeartbeatRequest(org.apache.tez.serviceplugins.api.TaskHeartbeatRequest) Event(org.apache.hadoop.yarn.event.Event) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) AMContainerTask(org.apache.tez.dag.app.rm.container.AMContainerTask) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) Test(org.junit.Test)

Example 9 with TaskAttemptEventAttemptFailed

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed in project tez by apache.

the class TestTaskCommunicatorManager2 method testTaskAttemptFailureViaContext.

// Tests fatal and non fatal
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testTaskAttemptFailureViaContext() throws IOException, TezException {
    TaskCommunicatorManagerWrapperForTest wrapper = new TaskCommunicatorManagerWrapperForTest();
    TaskSpec taskSpec1 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask1 = new AMContainerTask(taskSpec1, null, null, false, 10);
    TaskSpec taskSpec2 = wrapper.createTaskSpec();
    AMContainerTask amContainerTask2 = new AMContainerTask(taskSpec2, null, null, false, 10);
    ContainerId containerId1 = wrapper.createContainerId(1);
    wrapper.registerRunningContainer(containerId1);
    wrapper.registerTaskAttempt(containerId1, amContainerTask1);
    ContainerId containerId2 = wrapper.createContainerId(2);
    wrapper.registerRunningContainer(containerId2);
    wrapper.registerTaskAttempt(containerId2, amContainerTask2);
    // non-fatal
    wrapper.getTaskCommunicatorManager().taskFailed(taskSpec1.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.CONTAINER_EXITED, "--non-fatal--");
    ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(wrapper.getEventHandler(), times(1)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    TaskAttemptEventAttemptFailed failedEvent = (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType());
    assertTrue(failedEvent.getDiagnosticInfo().contains("--non-fatal--"));
    reset(wrapper.getEventHandler());
    // fatal
    wrapper.getTaskCommunicatorManager().taskFailed(taskSpec2.getTaskAttemptID(), TaskFailureType.FATAL, TaskAttemptEndReason.OTHER, "--fatal--");
    argumentCaptor = ArgumentCaptor.forClass(Event.class);
    verify(wrapper.getEventHandler(), times(1)).handle(argumentCaptor.capture());
    assertTrue(argumentCaptor.getAllValues().get(0) instanceof TaskAttemptEventAttemptFailed);
    failedEvent = (TaskAttemptEventAttemptFailed) argumentCaptor.getAllValues().get(0);
    assertEquals(TaskFailureType.FATAL, failedEvent.getTaskFailureType());
    assertTrue(failedEvent.getDiagnosticInfo().contains("--fatal--"));
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) Event(org.apache.hadoop.yarn.event.Event) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) AMContainerTask(org.apache.tez.dag.app.rm.container.AMContainerTask) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) Test(org.junit.Test)

Example 10 with TaskAttemptEventAttemptFailed

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed in project tez by apache.

the class TestVertexImpl method testVertexTaskAttemptProcessorFailure.

@SuppressWarnings("deprecation")
@Test(timeout = 5000)
public void testVertexTaskAttemptProcessorFailure() throws Exception {
    initAllVertices(VertexState.INITED);
    VertexImpl v = vertices.get("vertex1");
    startVertex(v);
    dispatcher.await();
    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(containers).when(appContext).getAllContainers();
    ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId));
    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID()));
    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
    ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.APPLICATION_ERROR));
    dispatcher.await();
    Assert.assertEquals(VertexState.RUNNING, v.getState());
    Assert.assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, ta.getTerminationCause());
}
Also used : AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Aggregations

TaskAttemptEventAttemptFailed (org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed)14 Test (org.junit.Test)13 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)11 TaskAttemptEventSchedule (org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule)10 TaskAttemptEventStartedRemotely (org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely)10 TaskAttemptEventSubmitted (org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted)10 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)9 Configuration (org.apache.hadoop.conf.Configuration)7 Container (org.apache.hadoop.yarn.api.records.Container)7 NodeId (org.apache.hadoop.yarn.api.records.NodeId)7 Event (org.apache.hadoop.yarn.event.Event)7 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)7 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)7 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)7 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)7 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)7 TezTaskID (org.apache.tez.dag.records.TezTaskID)7 TaskAttemptEvent (org.apache.tez.dag.app.dag.event.TaskAttemptEvent)6 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)6 TaskAttemptEventContainerTerminated (org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated)5