use of org.apache.tez.dag.app.rm.node.AMNodeTracker in project tez by apache.
the class TestContainerReuse method testDifferentResourceContainerReuse.
@Test(timeout = 5000)
public void testDifferentResourceContainerReuse() throws Exception {
Configuration tezConf = new Configuration(new YarnConfiguration());
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
CapturingEventHandler eventHandler = new CapturingEventHandler();
TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
AppContext appContext = mock(AppContext.class);
doReturn(new Configuration(false)).when(appContext).getAMConf();
AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
doReturn(amContainerMap).when(appContext).getAllContainers();
doReturn(amNodeTracker).when(appContext).getNodeTracker();
doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
doReturn(dagID).when(appContext).getCurrentDAGID();
doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
taskSchedulerManager.init(tezConf);
taskSchedulerManager.start();
TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
AtomicBoolean drainNotifier = new AtomicBoolean(false);
taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
Resource resource1 = Resource.newInstance(1024, 1);
Resource resource2 = Resource.newInstance(2048, 1);
String[] host1 = { "host1" };
String[] host2 = { "host2" };
String[] racks = { "/default-rack" };
Priority priority1 = Priority.newInstance(1);
TezVertexID vertexID1 = TezVertexID.getInstance(dagID, 1);
// Vertex 1, Task 1, Attempt 1, host1
TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 1), 1);
TaskAttempt ta11 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent1 = createLaunchRequestEvent(taID11, ta11, resource1, host1, racks, priority1);
// Vertex 1, Task 2, Attempt 1, host1
TezTaskAttemptID taID12 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 2), 1);
TaskAttempt ta12 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent2 = createLaunchRequestEvent(taID12, ta12, resource1, host1, racks, priority1);
// Vertex 1, Task 3, Attempt 1, host2
TezTaskAttemptID taID13 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 3), 1);
TaskAttempt ta13 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent3 = createLaunchRequestEvent(taID13, ta13, resource2, host2, racks, priority1);
// Vertex 1, Task 4, Attempt 1, host2
TezTaskAttemptID taID14 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 4), 1);
TaskAttempt ta14 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent4 = createLaunchRequestEvent(taID14, ta14, resource2, host2, racks, priority1);
taskSchedulerManager.handleEvent(lrEvent1);
taskSchedulerManager.handleEvent(lrEvent2);
taskSchedulerManager.handleEvent(lrEvent3);
taskSchedulerManager.handleEvent(lrEvent4);
Container container1 = createContainer(1, "host1", resource1, priority1);
Container container2 = createContainer(2, "host2", resource2, priority1);
// One container allocated, should start ta11
taskScheduler.onContainersAllocated(Collections.singletonList(container1));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
// Second container allocated, should start ta13
taskScheduler.onContainersAllocated(Collections.singletonList(container2));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container2));
// ta11 finished, should start ta12
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta13 finished, should start ta14
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta13, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta13, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container2));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta12 finished no pending requests, should release container1
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta12, true, null, null);
verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
// ta14 finished no pending requests, should release container2.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta14, true, null, null);
verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
taskScheduler.shutdown();
taskSchedulerManager.close();
}
use of org.apache.tez.dag.app.rm.node.AMNodeTracker in project tez by apache.
the class DAGAppMaster method serviceInit.
@Override
public synchronized void serviceInit(final Configuration conf) throws Exception {
this.amConf = conf;
initResourceCalculatorPlugins();
this.hadoopShim = new HadoopShimsLoader(this.amConf).getHadoopShim();
long sleepTimeBeforeSecs = this.amConf.getLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, TezConstants.TEZ_DAG_SLEEP_TIME_BEFORE_EXIT);
if (sleepTimeBeforeSecs >= 0) {
this.shutdownHandler.setSleepTimeBeforeExit(sleepTimeBeforeSecs);
}
this.isLocal = conf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT);
UserPayload defaultPayload = TezUtils.createUserPayloadFromConf(amConf);
List<NamedEntityDescriptor> taskSchedulerDescriptors = Lists.newLinkedList();
List<NamedEntityDescriptor> containerLauncherDescriptors = Lists.newLinkedList();
List<NamedEntityDescriptor> taskCommunicatorDescriptors = Lists.newLinkedList();
parseAllPlugins(taskSchedulerDescriptors, taskSchedulers, containerLauncherDescriptors, containerLaunchers, taskCommunicatorDescriptors, taskCommunicators, amPluginDescriptorProto, isLocal, defaultPayload);
LOG.info(buildPluginComponentLog(taskSchedulerDescriptors, taskSchedulers, "TaskSchedulers"));
LOG.info(buildPluginComponentLog(containerLauncherDescriptors, containerLaunchers, "ContainerLaunchers"));
LOG.info(buildPluginComponentLog(taskCommunicatorDescriptors, taskCommunicators, "TaskCommunicators"));
boolean disableVersionCheck = conf.getBoolean(TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK, TezConfiguration.TEZ_AM_DISABLE_CLIENT_VERSION_CHECK_DEFAULT);
// Check client - AM version compatibility
LOG.info("Comparing client version with AM version" + ", clientVersion=" + clientVersion + ", AMVersion=" + dagVersionInfo.getVersion());
Simple2LevelVersionComparator versionComparator = new Simple2LevelVersionComparator();
if (versionComparator.compare(clientVersion, dagVersionInfo.getVersion()) != 0) {
versionMismatchDiagnostics = "Incompatible versions found" + ", clientVersion=" + clientVersion + ", AMVersion=" + dagVersionInfo.getVersion();
addDiagnostic(versionMismatchDiagnostics);
if (disableVersionCheck) {
LOG.warn("Ignoring client-AM version mismatch as check disabled. " + versionMismatchDiagnostics);
} else {
LOG.error(versionMismatchDiagnostics);
versionMismatch = true;
}
}
dispatcher = createDispatcher();
if (isLocal) {
conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
conf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS_DEFAULT);
} else {
dispatcher.enableExitOnDispatchException();
}
String strAppId = this.appAttemptID.getApplicationId().toString();
this.tezSystemStagingDir = TezCommonUtils.getTezSystemStagingPath(conf, strAppId);
context = new RunningAppContext(conf);
this.aclManager = new ACLManager(appMasterUgi.getShortUserName(), this.amConf);
clientHandler = new DAGClientHandler(this);
addIfService(dispatcher, false);
recoveryDataDir = TezCommonUtils.getRecoveryPath(tezSystemStagingDir, conf);
recoveryFS = recoveryDataDir.getFileSystem(conf);
currentRecoveryDataDir = TezCommonUtils.getAttemptRecoveryPath(recoveryDataDir, appAttemptID.getAttemptId());
if (LOG.isDebugEnabled()) {
LOG.debug("Stage directory information for AppAttemptId :" + this.appAttemptID + " tezSystemStagingDir :" + tezSystemStagingDir + " recoveryDataDir :" + recoveryDataDir + " recoveryAttemptDir :" + currentRecoveryDataDir);
}
recoveryEnabled = conf.getBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT);
clientRpcServer = new DAGClientServer(clientHandler, appAttemptID, recoveryFS);
addIfService(clientRpcServer, true);
taskHeartbeatHandler = createTaskHeartbeatHandler(context, conf);
addIfService(taskHeartbeatHandler, true);
containerHeartbeatHandler = createContainerHeartbeatHandler(context, conf);
addIfService(containerHeartbeatHandler, true);
sessionToken = TokenCache.getSessionToken(amCredentials);
if (sessionToken == null) {
throw new RuntimeException("Could not find session token in AM Credentials");
}
// Prepare the TaskAttemptListener server for authentication of Containers
// TaskAttemptListener gets the information via jobTokenSecretManager.
jobTokenSecretManager.addTokenForJob(appAttemptID.getApplicationId().toString(), sessionToken);
// service to handle requests to TaskUmbilicalProtocol
taskCommunicatorManager = createTaskCommunicatorManager(context, taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorDescriptors);
addIfService(taskCommunicatorManager, true);
containerSignatureMatcher = createContainerSignatureMatcher();
containers = new AMContainerMap(containerHeartbeatHandler, taskCommunicatorManager, containerSignatureMatcher, context);
addIfService(containers, true);
dispatcher.register(AMContainerEventType.class, containers);
nodes = new AMNodeTracker(dispatcher.getEventHandler(), context);
addIfService(nodes, true);
dispatcher.register(AMNodeEventType.class, nodes);
this.dagEventDispatcher = new DagEventDispatcher();
this.vertexEventDispatcher = new VertexEventDispatcher();
// register the event dispatchers
dispatcher.register(DAGAppMasterEventType.class, new DAGAppMasterEventHandler());
dispatcher.register(DAGEventType.class, dagEventDispatcher);
dispatcher.register(VertexEventType.class, vertexEventDispatcher);
boolean useConcurrentDispatcher = conf.getBoolean(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER_DEFAULT);
LOG.info("Using concurrent dispatcher: " + useConcurrentDispatcher);
if (!useConcurrentDispatcher) {
dispatcher.register(TaskEventType.class, new TaskEventDispatcher());
dispatcher.register(TaskAttemptEventType.class, new TaskAttemptEventDispatcher());
} else {
int concurrency = conf.getInt(TezConfiguration.TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY, TezConfiguration.TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY_DEFAULT);
AsyncDispatcherConcurrent sharedDispatcher = dispatcher.registerAndCreateDispatcher(TaskEventType.class, new TaskEventDispatcher(), "TaskAndAttemptEventThread", concurrency);
dispatcher.registerWithExistingDispatcher(TaskAttemptEventType.class, new TaskAttemptEventDispatcher(), sharedDispatcher);
}
// register other delegating dispatchers
dispatcher.registerAndCreateDispatcher(SpeculatorEventType.class, new SpeculatorEventHandler(), "Speculator");
if (enableWebUIService()) {
this.webUIService = new WebUIService(context);
addIfService(webUIService, false);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Web UI Service is not enabled.");
}
}
this.taskSchedulerManager = createTaskSchedulerManager(taskSchedulerDescriptors);
addIfService(taskSchedulerManager, true);
if (enableWebUIService()) {
addIfServiceDependency(taskSchedulerManager, webUIService);
}
dispatcher.register(AMSchedulerEventType.class, taskSchedulerManager);
addIfServiceDependency(taskSchedulerManager, clientRpcServer);
this.containerLauncherManager = createContainerLauncherManager(containerLauncherDescriptors, isLocal);
addIfService(containerLauncherManager, true);
dispatcher.register(ContainerLauncherEventType.class, containerLauncherManager);
historyEventHandler = createHistoryEventHandler(context);
addIfService(historyEventHandler, true);
this.sessionTimeoutInterval = TezCommonUtils.getDAGSessionTimeout(amConf);
this.clientAMHeartbeatTimeoutIntervalMillis = TezCommonUtils.getAMClientHeartBeatTimeoutMillis(amConf);
if (!versionMismatch) {
if (isSession) {
FileInputStream sessionResourcesStream = null;
try {
sessionResourcesStream = new FileInputStream(new File(workingDirectory, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME));
PlanLocalResourcesProto amLocalResourceProto = PlanLocalResourcesProto.parseDelimitedFrom(sessionResourcesStream);
amResources.putAll(DagTypeConverters.convertFromPlanLocalResources(amLocalResourceProto));
} finally {
if (sessionResourcesStream != null) {
sessionResourcesStream.close();
}
}
}
}
rawExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("App Shared Pool - " + "#%d").build());
execService = MoreExecutors.listeningDecorator(rawExecutor);
initServices(conf);
super.serviceInit(conf);
if (!versionMismatch) {
if (this.appAttemptID.getAttemptId() == 1) {
AppLaunchedEvent appLaunchedEvent = new AppLaunchedEvent(appAttemptID.getApplicationId(), startTime, appSubmitTime, appMasterUgi.getShortUserName(), this.amConf, dagVersionInfo);
historyEventHandler.handle(new DAGHistoryEvent(appLaunchedEvent));
}
AMLaunchedEvent launchedEvent = new AMLaunchedEvent(appAttemptID, startTime, appSubmitTime, appMasterUgi.getShortUserName());
historyEventHandler.handle(new DAGHistoryEvent(launchedEvent));
this.state = DAGAppMasterState.INITED;
} else {
this.state = DAGAppMasterState.ERROR;
}
}
use of org.apache.tez.dag.app.rm.node.AMNodeTracker in project tez by apache.
the class TestContainerReuse method testDelayedReuseContainerBecomesAvailable.
@Test(timeout = 15000l)
public void testDelayedReuseContainerBecomesAvailable() throws IOException, InterruptedException, ExecutionException {
LOG.info("Test testDelayedReuseContainerBecomesAvailable");
Configuration conf = new Configuration(new YarnConfiguration());
conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, false);
conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false);
conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 3000l);
conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
CapturingEventHandler eventHandler = new CapturingEventHandler();
TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
AppContext appContext = mock(AppContext.class);
doReturn(conf).when(appContext).getAMConf();
AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
doReturn(amContainerMap).when(appContext).getAllContainers();
doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
doReturn(amNodeTracker).when(appContext).getNodeTracker();
doReturn(dagID).when(appContext).getCurrentDAGID();
doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(conf));
TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
taskSchedulerManager.init(conf);
taskSchedulerManager.start();
TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
AtomicBoolean drainNotifier = new AtomicBoolean(false);
taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
Resource resource = Resource.newInstance(1024, 1);
Priority priority = Priority.newInstance(5);
String[] host1 = { "host1" };
String[] host2 = { "host2" };
String[] defaultRack = { "/default-rack" };
TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID, 1), 1);
TezTaskAttemptID taID21 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID, 2), 1);
TezTaskAttemptID taID31 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID, 3), 1);
TaskAttempt ta11 = mock(TaskAttempt.class);
TaskAttempt ta21 = mock(TaskAttempt.class);
TaskAttempt ta31 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrTa11 = createLaunchRequestEvent(taID11, ta11, resource, host1, defaultRack, priority);
AMSchedulerEventTALaunchRequest lrTa21 = createLaunchRequestEvent(taID21, ta21, resource, host2, defaultRack, priority);
AMSchedulerEventTALaunchRequest lrTa31 = createLaunchRequestEvent(taID31, ta31, resource, host1, defaultRack, priority);
taskSchedulerManager.handleEvent(lrTa11);
taskSchedulerManager.handleEvent(lrTa21);
Container containerHost1 = createContainer(1, host1[0], resource, priority);
Container containerHost2 = createContainer(2, host2[0], resource, priority);
taskScheduler.onContainersAllocated(Lists.newArrayList(containerHost1, containerHost2));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(containerHost1));
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta21), any(Object.class), eq(containerHost2));
// Adding the event later so that task1 assigned to containerHost1
// is deterministic.
taskSchedulerManager.handleEvent(lrTa31);
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, containerHost1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
verify(taskSchedulerManager, times(1)).taskAllocated(eq(0), eq(ta31), any(Object.class), eq(containerHost1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(containerHost1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta21, containerHost2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
long currentTs = System.currentTimeMillis();
Throwable exception = null;
while (System.currentTimeMillis() < currentTs + 5000l) {
try {
verify(taskSchedulerManager, times(1)).containerBeingReleased(eq(0), eq(containerHost2.getId()));
exception = null;
break;
} catch (Throwable e) {
exception = e;
}
}
assertTrue("containerHost2 was not released", exception == null);
taskScheduler.shutdown();
taskSchedulerManager.close();
}
use of org.apache.tez.dag.app.rm.node.AMNodeTracker in project tez by apache.
the class TestContainerReuse method testReuseNonLocalRequest.
@Test(timeout = 30000l)
public void testReuseNonLocalRequest() throws IOException, InterruptedException, ExecutionException {
LOG.info("Test testReuseNonLocalRequest");
Configuration tezConf = new Configuration(new YarnConfiguration());
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, true);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100l);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 1000l);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 1000l);
CapturingEventHandler eventHandler = new CapturingEventHandler();
TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
AppContext appContext = mock(AppContext.class);
doReturn(new Configuration(false)).when(appContext).getAMConf();
AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
doReturn(amContainerMap).when(appContext).getAllContainers();
doReturn(amNodeTracker).when(appContext).getNodeTracker();
doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
doReturn(dagID).when(appContext).getCurrentDAGID();
doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
taskSchedulerManager.init(tezConf);
taskSchedulerManager.start();
TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
AtomicBoolean drainNotifier = new AtomicBoolean(false);
taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
Resource resource1 = Resource.newInstance(1024, 1);
String[] emptyHosts = new String[0];
String[] racks = { "default-rack" };
Priority priority = Priority.newInstance(3);
TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
// Vertex 1, Task 1, Attempt 1, no locality information.
TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID, 1), 1);
TaskAttempt ta11 = mock(TaskAttempt.class);
doReturn(vertexID).when(ta11).getVertexID();
AMSchedulerEventTALaunchRequest lrEvent11 = createLaunchRequestEvent(taID11, ta11, resource1, emptyHosts, racks, priority);
// Vertex1, Task2, Attempt 1, no locality information.
TezTaskAttemptID taID12 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID, 2), 1);
TaskAttempt ta12 = mock(TaskAttempt.class);
doReturn(vertexID).when(ta12).getVertexID();
AMSchedulerEventTALaunchRequest lrEvent12 = createLaunchRequestEvent(taID12, ta12, resource1, emptyHosts, racks, priority);
// Send launch request for task 1 only, deterministic assignment to this task.
taskSchedulerManager.handleEvent(lrEvent11);
Container container1 = createContainer(1, "randomHost", resource1, priority);
// One container allocated.
taskScheduler.onContainersAllocated(Collections.singletonList(container1));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
// Send launch request for task2 (vertex2)
taskSchedulerManager.handleEvent(lrEvent12);
// Task assigned to container completed successfully.
// Container should not be immediately assigned to task 2
// until delay expires.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
// TA12 completed.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
taskScheduler.shutdown();
taskSchedulerManager.close();
}
use of org.apache.tez.dag.app.rm.node.AMNodeTracker in project tez by apache.
the class TestContainerReuse method testSimpleReuse.
@Test(timeout = 10000l)
public void testSimpleReuse() throws IOException, InterruptedException, ExecutionException {
LOG.info("Test testSimpleReuse");
Configuration tezConf = new Configuration(new YarnConfiguration());
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0);
tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0);
CapturingEventHandler eventHandler = new CapturingEventHandler();
TezDAGID dagID = TezDAGID.getInstance("0", 0, 0);
AMRMClient<CookieContainerRequest> rmClientCore = new AMRMClientForTest();
TezAMRMClientAsync<CookieContainerRequest> rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100));
AppContext appContext = mock(AppContext.class);
doReturn(new Configuration(false)).when(appContext).getAMConf();
AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext);
AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext);
doReturn(amContainerMap).when(appContext).getAllContainers();
doReturn(amNodeTracker).when(appContext).getNodeTracker();
doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState();
doReturn(dagID).when(appContext).getCurrentDAGID();
doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo();
TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf));
TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal);
taskSchedulerManager.init(tezConf);
taskSchedulerManager.start();
TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler();
TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback();
AtomicBoolean drainNotifier = new AtomicBoolean(false);
taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier;
Resource resource1 = Resource.newInstance(1024, 1);
String[] host1 = { "host1" };
String[] host2 = { "host2" };
String[] racks = { "/default-rack" };
Priority priority1 = Priority.newInstance(1);
TezVertexID vertexID1 = TezVertexID.getInstance(dagID, 1);
// Vertex 1, Task 1, Attempt 1, host1
TezTaskAttemptID taID11 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 1), 1);
TaskAttempt ta11 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent1 = createLaunchRequestEvent(taID11, ta11, resource1, host1, racks, priority1);
// Vertex 1, Task 2, Attempt 1, host1
TezTaskAttemptID taID12 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 2), 1);
TaskAttempt ta12 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent2 = createLaunchRequestEvent(taID12, ta12, resource1, host1, racks, priority1);
// Vertex 1, Task 3, Attempt 1, host2
TezTaskAttemptID taID13 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 3), 1);
TaskAttempt ta13 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent3 = createLaunchRequestEvent(taID13, ta13, resource1, host2, racks, priority1);
// Vertex 1, Task 4, Attempt 1, host2
TezTaskAttemptID taID14 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID1, 4), 1);
TaskAttempt ta14 = mock(TaskAttempt.class);
AMSchedulerEventTALaunchRequest lrEvent4 = createLaunchRequestEvent(taID14, ta14, resource1, host2, racks, priority1);
taskSchedulerManager.handleEvent(lrEvent1);
taskSchedulerManager.handleEvent(lrEvent2);
taskSchedulerManager.handleEvent(lrEvent3);
taskSchedulerManager.handleEvent(lrEvent4);
Container container1 = createContainer(1, "host1", resource1, priority1);
// One container allocated.
taskScheduler.onContainersAllocated(Collections.singletonList(container1));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1));
// Task assigned to container completed successfully. Container should be re-used.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta11, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// Task assigned to container completed successfully.
// Verify reuse across hosts.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta12, true, null, null);
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container1));
verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class);
eventHandler.reset();
// Verify no re-use if a previous task fails.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, null, "TIMEOUT", 0));
drainableAppCallback.drain();
verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container1));
verifyDeAllocateTask(taskScheduler, ta13, false, null, "TIMEOUT");
verify(rmClient).releaseAssignedContainer(eq(container1.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
Container container2 = createContainer(2, "host2", resource1, priority1);
// Second container allocated. Should be allocated to the last task.
taskScheduler.onContainersAllocated(Collections.singletonList(container2));
TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier);
drainableAppCallback.drain();
verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container2));
// Task assigned to container completed successfully. No pending requests. Container should be released.
taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta14, container2.getId(), TaskAttemptState.SUCCEEDED, null, null, 0));
drainableAppCallback.drain();
verifyDeAllocateTask(taskScheduler, ta14, true, null, null);
verify(rmClient).releaseAssignedContainer(eq(container2.getId()));
eventHandler.verifyInvocation(AMContainerEventStopRequest.class);
eventHandler.reset();
taskScheduler.shutdown();
taskSchedulerManager.close();
}
Aggregations