Search in sources :

Example 16 with ContainerContextMatcher

use of org.apache.tez.dag.app.rm.container.ContainerContextMatcher in project tez by apache.

the class TestContainerReuse method testAssignmentOnShutdown.

@Test(timeout = 10000l)
public void testAssignmentOnShutdown() throws IOException, InterruptedException, ExecutionException {
    LOG.info("Test testAssignmentOnShutdown");
    Configuration tezConf = new Configuration(new YarnConfiguration());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false);
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
    CapturingEventHandler eventHandler = new CapturingEventHandler();
    TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
    AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
    TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
    AppContext appContext = mock(AppContext.class);
    doReturn(new Configuration(false)).when(appContext).getAMConf();
    AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
    AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
    doReturn(amContainerMap).when(appContext).getAllContainers();
    doReturn(amNodeTracker).when(appContext).getNodeTracker();
    doReturn(DAGAppMasterState.SUCCEEDED).when(appContext).getAMState();
    doReturn(true).when(appContext).isAMInCompletionState();
    doReturn(dagID).when(appContext).getCurrentDAGID();
    doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
    TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
    TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
    taskSchedulerManager.init(tezConf);
    taskSchedulerManager.start();
    TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
    TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
    AtomicBoolean drainNotifier = new AtomicBoolean(false);
    taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
    Resource resource1 = Resource.newInstance(1024, 1);
    String[] host1 = { "host1" };
    String[] racks = { "/default-rack" };
    Priority priority1 = Priority.newInstance(1);
    TezVertexID vertexID1 = TezVertexID.getInstance(dagID, 1);
    // Vertex 1, Task 1, Attempt 1, host1
    TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 1), 1);
    TaskAttempt ta11 = mock(TaskAttempt.class);
    AMSchedulerEventTALaunchRequest lrEvent1 = createLaunchRequestEvent(taID11, ta11, resource1, host1, racks, priority1);
    taskSchedulerManager.handleEvent(lrEvent1);
    Container container1 = createContainer(1, "host1", resource1, priority1);
    // One container allocated.
    taskScheduler.onContainersAllocated(Collections.singletonList(container1));
    drainableAppCallback.drain();
    verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
    taskScheduler.shutdown();
    taskSchedulerManager.close();
}
Also used : CapturingEventHandler(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.CapturingEventHandler) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TaskSchedulerWithDrainableContext(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerWithDrainableContext) AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) AlwaysMatchesContainerMatcher(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AlwaysMatchesContainerMatcher) Container(org.apache.hadoop.yarn.api.records.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TezDAGID(org.apache.tez.dag.records.TezDAGID) AMRMClientAsyncForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) TaskSchedulerContextDrainable(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerContextDrainable) TezVertexID(org.apache.tez.dag.records.TezVertexID) Priority(org.apache.hadoop.yarn.api.records.Priority) AppContext(org.apache.tez.dag.app.AppContext) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) AMNodeTracker(org.apache.tez.dag.app.rm.node.AMNodeTracker) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) TaskSchedulerManagerForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerManagerForTest) CookieContainerRequest(org.apache.tez.dag.app.rm.YarnTaskSchedulerService.CookieContainerRequest) AMRMClientForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientForTest) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) TaskSchedulerManagerForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerManagerForTest) AMRMClientForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientForTest) AMRMClientAsyncForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest) Test(org.junit.Test)

Example 17 with ContainerContextMatcher

use of org.apache.tez.dag.app.rm.container.ContainerContextMatcher in project tez by apache.

the class TestContainerReuse method testDifferentResourceContainerReuse.

@Test(timeout = 5000)
public void testDifferentResourceContainerReuse() throws Exception {
    Configuration tezConf = new Configuration(new YarnConfiguration());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
    tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
    tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
    CapturingEventHandler eventHandler = new CapturingEventHandler();
    TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
    AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
    TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
    AppContext appContext = mock(AppContext.class);
    doReturn(new Configuration(false)).when(appContext).getAMConf();
    AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
    AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
    doReturn(amContainerMap).when(appContext).getAllContainers();
    doReturn(amNodeTracker).when(appContext).getNodeTracker();
    doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
    doReturn(dagID).when(appContext).getCurrentDAGID();
    doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
    TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
    TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
    taskSchedulerManager.init(tezConf);
    taskSchedulerManager.start();
    TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
    TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
    AtomicBoolean drainNotifier = new AtomicBoolean(false);
    taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
    Resource resource1 = Resource.newInstance(1024, 1);
    Resource resource2 = Resource.newInstance(2048, 1);
    String[] host1 = { "host1" };
    String[] host2 = { "host2" };
    String[] racks = { "/default-rack" };
    Priority priority1 = Priority.newInstance(1);
    TezVertexID vertexID1 = TezVertexID.getInstance(dagID, 1);
    // Vertex 1, Task 1, Attempt 1, host1
    TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 1), 1);
    TaskAttempt ta11 = mock(TaskAttempt.class);
    AMSchedulerEventTALaunchRequest lrEvent1 = createLaunchRequestEvent(taID11, ta11, resource1, host1, racks, priority1);
    // Vertex 1, Task 2, Attempt 1, host1
    TezTaskAttemptID taID12 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 2), 1);
    TaskAttempt ta12 = mock(TaskAttempt.class);
    AMSchedulerEventTALaunchRequest lrEvent2 = createLaunchRequestEvent(taID12, ta12, resource1, host1, racks, priority1);
    // Vertex 1, Task 3, Attempt 1, host2
    TezTaskAttemptID taID13 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 3), 1);
    TaskAttempt ta13 = mock(TaskAttempt.class);
    AMSchedulerEventTALaunchRequest lrEvent3 = createLaunchRequestEvent(taID13, ta13, resource2, host2, racks, priority1);
    // Vertex 1, Task 4, Attempt 1, host2
    TezTaskAttemptID taID14 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 4), 1);
    TaskAttempt ta14 = mock(TaskAttempt.class);
    AMSchedulerEventTALaunchRequest lrEvent4 = createLaunchRequestEvent(taID14, ta14, resource2, host2, racks, priority1);
    taskSchedulerManager.handleEvent(lrEvent1);
    taskSchedulerManager.handleEvent(lrEvent2);
    taskSchedulerManager.handleEvent(lrEvent3);
    taskSchedulerManager.handleEvent(lrEvent4);
    Container container1 = createContainer(1, "host1", resource1, priority1);
    Container container2 = createContainer(2, "host2", resource2, priority1);
    // One container allocated, should start ta11
    taskScheduler.onContainersAllocated(Collections.singletonList(container1));
    TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
    drainableAppCallback.drain();
    verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
    // Second container allocated, should start ta13
    taskScheduler.onContainersAllocated(Collections.singletonList(container2));
    TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
    drainableAppCallback.drain();
    verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container2));
    // ta11 finished, should start ta12
    taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
    drainableAppCallback.drain();
    verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
    verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
    verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
    eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
    eventHandler.reset();
    // ta13 finished, should start ta14
    taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
    drainableAppCallback.drain();
    verifyDeAllocateTask(taskScheduler, ta13, true, null, null);
    verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container2));
    verify(rmClient, times(0)).releaseAssignedContainer(eq(container2.getId()));
    eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
    eventHandler.reset();
    // ta12 finished no pending requests, should release container1
    taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
    drainableAppCallback.drain();
    verifyDeAllocateTask(taskScheduler, ta12, true, null, null);
    verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
    eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
    eventHandler.reset();
    // ta14 finished no pending requests, should release container2.
    taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
    drainableAppCallback.drain();
    verifyDeAllocateTask(taskScheduler, ta14, true, null, null);
    verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
    eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
    eventHandler.reset();
    taskScheduler.shutdown();
    taskSchedulerManager.close();
}
Also used : CapturingEventHandler(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.CapturingEventHandler) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TaskSchedulerWithDrainableContext(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerWithDrainableContext) AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) AlwaysMatchesContainerMatcher(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AlwaysMatchesContainerMatcher) Container(org.apache.hadoop.yarn.api.records.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TezDAGID(org.apache.tez.dag.records.TezDAGID) AMRMClientAsyncForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) TaskSchedulerContextDrainable(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerContextDrainable) TezVertexID(org.apache.tez.dag.records.TezVertexID) Priority(org.apache.hadoop.yarn.api.records.Priority) AppContext(org.apache.tez.dag.app.AppContext) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) AMNodeTracker(org.apache.tez.dag.app.rm.node.AMNodeTracker) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) TaskSchedulerManagerForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerManagerForTest) CookieContainerRequest(org.apache.tez.dag.app.rm.YarnTaskSchedulerService.CookieContainerRequest) AMRMClientForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientForTest) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) TaskSchedulerManagerForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerManagerForTest) AMRMClientForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientForTest) AMRMClientAsyncForTest(org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest) Test(org.junit.Test)

Example 18 with ContainerContextMatcher

use of org.apache.tez.dag.app.rm.container.ContainerContextMatcher in project tez by apache.

the class TestVertexImpl method testVertexTaskAttemptProcessorFailure.

@SuppressWarnings("deprecation")
@Test(timeout = 5000)
public void testVertexTaskAttemptProcessorFailure() throws Exception {
    initAllVertices(VertexState.INITED);
    VertexImpl v = vertices.get("vertex1");
    startVertex(v);
    dispatcher.await();
    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(containers).when(appContext).getAllContainers();
    ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId));
    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID()));
    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
    ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.APPLICATION_ERROR));
    dispatcher.await();
    Assert.assertEquals(VertexState.RUNNING, v.getState());
    Assert.assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, ta.getTerminationCause());
}
Also used : AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Example 19 with ContainerContextMatcher

use of org.apache.tez.dag.app.rm.container.ContainerContextMatcher in project tez by apache.

the class TestVertexImpl method testVertexTaskAttemptInputFailure.

@SuppressWarnings("deprecation")
@Test(timeout = 5000)
public void testVertexTaskAttemptInputFailure() throws Exception {
    initAllVertices(VertexState.INITED);
    VertexImpl v = vertices.get("vertex1");
    startVertex(v);
    dispatcher.await();
    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(containers).when(appContext).getAllContainers();
    ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId));
    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID()));
    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
    ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.INPUT_READ_ERROR));
    dispatcher.await();
    Assert.assertEquals(VertexState.RUNNING, v.getState());
    Assert.assertEquals(TaskAttemptTerminationCause.INPUT_READ_ERROR, ta.getTerminationCause());
}
Also used : AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Example 20 with ContainerContextMatcher

use of org.apache.tez.dag.app.rm.container.ContainerContextMatcher in project tez by apache.

the class TestTaskAttempt method testFailure.

@Test(timeout = 5000)
public void testFailure() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
    MockEventHandler eventHandler = spy(new MockEventHandler());
    TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener();
    Configuration taskConf = new Configuration();
    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    taskConf.setBoolean("fs.file.impl.disable.cache", true);
    taskConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true);
    locationHint = TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(new String[] { "127.0.0.1" })), null);
    Resource resource = Resource.newInstance(1024, 1);
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    @SuppressWarnings("deprecation") ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appCtx);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
    doReturn(containers).when(appCtx).getAllContainers();
    TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class);
    TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false);
    TezTaskAttemptID taskAttemptID = taImpl.getID();
    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
    taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
    taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId));
    taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID));
    assertEquals("Task attempt is not in the RUNNING state", taImpl.getState(), TaskAttemptState.RUNNING);
    verify(mockHeartbeatHandler).register(taskAttemptID);
    int expectedEventsAtRunning = 6;
    verify(eventHandler, times(expectedEventsAtRunning)).handle(arg.capture());
    verifyEventType(arg.getAllValues().subList(0, expectedEventsAtRunning), SpeculatorEventTaskAttemptStatusUpdate.class, 1);
    taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false)));
    taImpl.handle(new TaskAttemptEventAttemptFailed(taskAttemptID, TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "0", TaskAttemptTerminationCause.APPLICATION_ERROR));
    assertEquals("Task attempt is not in the  FAIL_IN_PROGRESS state", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_IN_PROGRESS);
    verify(mockHeartbeatHandler).unregister(taskAttemptID);
    assertEquals(1, taImpl.getDiagnostics().size());
    assertEquals("0", taImpl.getDiagnostics().get(0));
    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
    assertEquals(TaskAttemptStateInternal.FAIL_IN_PROGRESS, taImpl.getInternalState());
    taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST));
    assertFalse("InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in FAIL_IN_PROGRESS state", eventHandler.internalError);
    taImpl.handle(new TaskAttemptEventContainerTerminated(contId, taskAttemptID, "1", TaskAttemptTerminationCause.CONTAINER_EXITED));
    // verify unregister is not invoked again
    verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID);
    assertEquals(2, taImpl.getDiagnostics().size());
    assertEquals("1", taImpl.getDiagnostics().get(1));
    // err cause does not change
    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
    int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5;
    arg = ArgumentCaptor.forClass(Event.class);
    verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture());
    Event e = verifyEventType(arg.getAllValues().subList(expectedEventsAtRunning, expectedEvenstAfterTerminating), TaskEventTAFailed.class, 1);
    TaskEventTAFailed failedEvent = (TaskEventTAFailed) e;
    assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType());
    verifyEventType(arg.getAllValues().subList(expectedEventsAtRunning, expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1);
    verifyEventType(arg.getAllValues().subList(expectedEventsAtRunning, expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
    verifyEventType(arg.getAllValues().subList(expectedEventsAtRunning, expectedEvenstAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TaskAttemptEventStatusUpdate(org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) Container(org.apache.hadoop.yarn.api.records.Container) TaskAttemptEventTezEventUpdate(org.apache.tez.dag.app.dag.event.TaskAttemptEventTezEventUpdate) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) TaskHeartbeatHandler(org.apache.tez.dag.app.TaskHeartbeatHandler) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) TezVertexID(org.apache.tez.dag.records.TezVertexID) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) HashSet(java.util.HashSet) TaskAttemptEventStartedRemotely(org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TaskEventTAFailed(org.apache.tez.dag.app.dag.event.TaskEventTAFailed) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TaskAttemptEventSubmitted(org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TaskEvent(org.apache.tez.dag.app.dag.event.TaskEvent) DAGEvent(org.apache.tez.dag.app.dag.event.DAGEvent) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) TaskAttemptEvent(org.apache.tez.dag.app.dag.event.TaskAttemptEvent) Event(org.apache.hadoop.yarn.event.Event) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventContainerTerminated(org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) TaskAttemptEventAttemptFailed(org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed) Test(org.junit.Test)

Aggregations

Container (org.apache.hadoop.yarn.api.records.Container)31 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)31 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)31 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)31 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)31 Test (org.junit.Test)31 Configuration (org.apache.hadoop.conf.Configuration)27 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)27 Resource (org.apache.hadoop.yarn.api.records.Resource)27 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)27 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)27 TezDAGID (org.apache.tez.dag.records.TezDAGID)27 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)27 TezVertexID (org.apache.tez.dag.records.TezVertexID)27 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 NodeId (org.apache.hadoop.yarn.api.records.NodeId)20 TaskAttemptEventSchedule (org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule)19 TaskAttemptEventSubmitted (org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted)19 TaskAttemptEventStartedRemotely (org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely)18 TezTaskID (org.apache.tez.dag.records.TezTaskID)17