use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestContainerReuse method testDifferentResourceContainerReuse.
@Test(timeout = 5000)
public void testDifferentResourceContainerReuse() throws Exception {
Configuration tezConf = new Configuration(new YarnConfiguration());
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
CapturingEventHandler eventHandler = new CapturingEventHandler();
TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
AppContext appContext = mock(AppContext.class);
doReturn(new Configuration(false)).when(appContext).getAMConf();
AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
doReturn(amContainerMap).when(appContext).getAllContainers();
doReturn(amNodeTracker).when(appContext).getNodeTracker();
doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
doReturn(dagID).when(appContext).getCurrentDAGID();
doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
taskSchedulerManager.init(tezConf);
taskSchedulerManager.start();
TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
AtomicBoolean drainNotifier = new AtomicBoolean(false);
taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
Resource resource1 = Resource.newInstance(1024, 1);
Resource resource2 = Resource.newInstance(2048, 1);
String[] host1 = { "host1" };
String[] host2 = { "host2" };
String[] racks = { "/default-rack" };
Priority priority1 = Priority.newInstance(1);
TezVertexID vertexID1 = TezVertexID.getInstance(dagID, 1);
// Vertex 1, Task 1, Attempt 1, host1
TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 1), 1);
TaskAttempt ta11 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent1 = createLaunchRequestEvent(taID11, ta11, resource1, host1, racks, priority1);
// Vertex 1, Task 2, Attempt 1, host1
TezTaskAttemptID taID12 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 2), 1);
TaskAttempt ta12 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent2 = createLaunchRequestEvent(taID12, ta12, resource1, host1, racks, priority1);
// Vertex 1, Task 3, Attempt 1, host2
TezTaskAttemptID taID13 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 3), 1);
TaskAttempt ta13 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent3 = createLaunchRequestEvent(taID13, ta13, resource2, host2, racks, priority1);
// Vertex 1, Task 4, Attempt 1, host2
TezTaskAttemptID taID14 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 4), 1);
TaskAttempt ta14 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent4 = createLaunchRequestEvent(taID14, ta14, resource2, host2, racks, priority1);
taskSchedulerManager.handleEvent(lrEvent1);
taskSchedulerManager.handleEvent(lrEvent2);
taskSchedulerManager.handleEvent(lrEvent3);
taskSchedulerManager.handleEvent(lrEvent4);
Container container1 = createContainer(1, "host1", resource1, priority1);
Container container2 = createContainer(2, "host2", resource2, priority1);
// One container allocated, should start ta11
taskScheduler.onContainersAllocated(Collections.singletonList(container1));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
// Second container allocated, should start ta13
taskScheduler.onContainersAllocated(Collections.singletonList(container2));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container2));
// ta11 finished, should start ta12
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta13 finished, should start ta14
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta13, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container2));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta12 finished no pending requests, should release container1
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta12, true, null, null);
verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta14 finished no pending requests, should release container2.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta14, true, null, null);
verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
taskScheduler.shutdown();
taskSchedulerManager.close();
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestRecoveryParser method testRecoverableSummary_VertexGroupInCommitting.
@Test(timeout = 5000)
public void testRecoverableSummary_VertexGroupInCommitting() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write a DAGSubmittedEvent first to initialize summaryStream
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// It should be fine to skip other events, just for testing.
rService.handle(new DAGHistoryEvent(dagID, new VertexGroupCommitStartedEvent(dagID, "group_1", Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertEquals(dagID, dagData.recoveredDagID);
assertTrue(dagData.nonRecoverable);
assertTrue(dagData.reason.contains("Vertex Group Commit was in progress"));
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestRecoveryParser method testRecoverableSummary_DAGInCommitting.
@Test(timeout = 5000)
public void testRecoverableSummary_DAGInCommitting() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write a DAGSubmittedEvent first to initialize summaryStream
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// It should be fine to skip other events, just for testing.
rService.handle(new DAGHistoryEvent(dagID, new DAGCommitStartedEvent(dagID, 0L)));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertEquals(dagID, dagData.recoveredDagID);
assertTrue(dagData.nonRecoverable);
assertTrue(dagData.reason.contains("DAG Commit was in progress"));
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestRecoveryParser method testRecoverableNonSummary1.
@Test(timeout = 5000)
public void testRecoverableNonSummary1() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
// MockRecoveryService will skip the non-summary event
MockRecoveryService rService = new MockRecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write a DAGSubmittedEvent first to initialize summaryStream
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// It should be fine to skip other events, just for testing.
TezVertexID vertexId = TezVertexID.getInstance(dagID, 0);
rService.handle(new DAGHistoryEvent(dagID, new VertexCommitStartedEvent(vertexId, 0L)));
rService.handle(new DAGHistoryEvent(dagID, new VertexFinishedEvent(vertexId, "v1", 10, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", null, null, null, null)));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertTrue(dagData.nonRecoverable);
assertTrue(dagData.reason.contains("Vertex has been committed, but its full recovery events are not seen"));
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestRecoveryParser method testSkipAllOtherEvents_1.
// skipAllOtherEvents due to non-recoverable (in the middle of commit)
@Test(timeout = 5000)
public void testSkipAllOtherEvents_1() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write data in attempt_1
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
rService.handle(new DAGHistoryEvent(dagID, new DAGInitializedEvent(dagID, 1L, "user", dagPlan.getName(), null)));
// only for testing, DAGCommitStartedEvent is not supposed to happen at this time.
rService.handle(new DAGHistoryEvent(dagID, new DAGCommitStartedEvent(dagID, System.currentTimeMillis())));
rService.stop();
// write data in attempt_2
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/2"));
rService = new RecoveryService(appContext);
rService.init(conf);
rService.start();
// only for testing, DAGStartedEvent is not supposed to happen at this time.
rService.handle(new DAGHistoryEvent(dagID, new DAGStartedEvent(dagID, 1L, "user", "dag1")));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertEquals(true, dagData.nonRecoverable);
assertTrue(dagData.reason.contains("DAG Commit was in progress, not recoverable,"));
// DAGSubmittedEvent is handled but DAGInitializedEvent and DAGStartedEvent in the next attempt are both skipped
// due to the dag is not recoerable.
verify(mockAppMaster).createDAG(any(DAGPlan.class), any(TezDAGID.class));
assertNull(dagData.getDAGInitializedEvent());
assertNull(dagData.getDAGStartedEvent());
}
Aggregations