use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId in project hadoop by apache.
the class TestContainerLauncher method testPoolSize.
@Test(timeout = 10000)
public void testPoolSize() throws InterruptedException {
ApplicationId appId = ApplicationId.newInstance(12345, 67);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 3);
JobId jobId = MRBuilderUtils.newJobId(appId, 8);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 9, TaskType.MAP);
AppContext context = mock(AppContext.class);
CustomContainerLauncher containerLauncher = new CustomContainerLauncher(context);
containerLauncher.init(new Configuration());
containerLauncher.start();
ThreadPoolExecutor threadPool = containerLauncher.getThreadPool();
// No events yet
Assert.assertEquals(containerLauncher.initialPoolSize, MRJobConfig.DEFAULT_MR_AM_CONTAINERLAUNCHER_THREADPOOL_INITIAL_SIZE);
Assert.assertEquals(0, threadPool.getPoolSize());
Assert.assertEquals(containerLauncher.initialPoolSize, threadPool.getCorePoolSize());
Assert.assertNull(containerLauncher.foundErrors);
containerLauncher.expectedCorePoolSize = containerLauncher.initialPoolSize;
for (int i = 0; i < 10; i++) {
ContainerId containerId = ContainerId.newContainerId(appAttemptId, i);
TaskAttemptId taskAttemptId = MRBuilderUtils.newTaskAttemptId(taskId, i);
containerLauncher.handle(new ContainerLauncherEvent(taskAttemptId, containerId, "host" + i + ":1234", null, ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH));
}
waitForEvents(containerLauncher, 10);
Assert.assertEquals(10, threadPool.getPoolSize());
Assert.assertNull(containerLauncher.foundErrors);
// Same set of hosts, so no change
containerLauncher.finishEventHandling = true;
int timeOut = 0;
while (containerLauncher.numEventsProcessed.get() < 10 && timeOut++ < 200) {
LOG.info("Waiting for number of events processed to become " + 10 + ". It is now " + containerLauncher.numEventsProcessed.get() + ". Timeout is " + timeOut);
Thread.sleep(1000);
}
Assert.assertEquals(10, containerLauncher.numEventsProcessed.get());
containerLauncher.finishEventHandling = false;
for (int i = 0; i < 10; i++) {
ContainerId containerId = ContainerId.newContainerId(appAttemptId, i + 10);
TaskAttemptId taskAttemptId = MRBuilderUtils.newTaskAttemptId(taskId, i + 10);
containerLauncher.handle(new ContainerLauncherEvent(taskAttemptId, containerId, "host" + i + ":1234", null, ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH));
}
waitForEvents(containerLauncher, 20);
Assert.assertEquals(10, threadPool.getPoolSize());
Assert.assertNull(containerLauncher.foundErrors);
// Different hosts, there should be an increase in core-thread-pool size to
// 21(11hosts+10buffer)
// Core pool size should be 21 but the live pool size should be only 11.
containerLauncher.expectedCorePoolSize = 11 + containerLauncher.initialPoolSize;
containerLauncher.finishEventHandling = false;
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 21);
TaskAttemptId taskAttemptId = MRBuilderUtils.newTaskAttemptId(taskId, 21);
containerLauncher.handle(new ContainerLauncherEvent(taskAttemptId, containerId, "host11:1234", null, ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH));
waitForEvents(containerLauncher, 21);
Assert.assertEquals(11, threadPool.getPoolSize());
Assert.assertNull(containerLauncher.foundErrors);
containerLauncher.stop();
// change configuration MR_AM_CONTAINERLAUNCHER_THREADPOOL_INITIAL_SIZE
// and verify initialPoolSize value.
Configuration conf = new Configuration();
conf.setInt(MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREADPOOL_INITIAL_SIZE, 20);
containerLauncher = new CustomContainerLauncher(context);
containerLauncher.init(conf);
Assert.assertEquals(containerLauncher.initialPoolSize, 20);
}
use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId in project hadoop by apache.
the class TestContainerLauncher method testSlowNM.
@Test(timeout = 15000)
public void testSlowNM() throws Exception {
conf = new Configuration();
int maxAttempts = 1;
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
// set timeout low for the test
conf.setInt("yarn.rpc.nm-command-timeout", 3000);
conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class.getName());
YarnRPC rpc = YarnRPC.create(conf);
String bindAddr = "localhost:0";
InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
NMTokenSecretManagerInNM tokenSecretManager = new NMTokenSecretManagerInNM();
MasterKey masterKey = Records.newRecord(MasterKey.class);
masterKey.setBytes(ByteBuffer.wrap("key".getBytes()));
tokenSecretManager.setMasterKey(masterKey);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "token");
server = rpc.getServer(ContainerManagementProtocol.class, new DummyContainerManager(), addr, conf, tokenSecretManager, 1);
server.start();
MRApp app = new MRAppWithSlowNM(tokenSecretManager);
try {
Job job = app.submit(conf);
app.waitForState(job, JobState.RUNNING);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
Task task = tasks.values().iterator().next();
app.waitForState(task, TaskState.SCHEDULED);
Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size());
TaskAttempt attempt = attempts.values().iterator().next();
app.waitForInternalState((TaskAttemptImpl) attempt, TaskAttemptStateInternal.ASSIGNED);
app.waitForState(job, JobState.FAILED);
String diagnostics = attempt.getDiagnostics().toString();
LOG.info("attempt.getDiagnostics: " + diagnostics);
Assert.assertTrue(diagnostics.contains("Container launch failed for " + "container_0_0000_01_000000 : "));
Assert.assertTrue(diagnostics.contains("java.net.SocketTimeoutException: 3000 millis timeout while waiting for channel"));
} finally {
server.stop();
app.stop();
}
}
use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId in project hadoop by apache.
the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.
/**
* Tests whether scheduled reducers are excluded from headroom while
* calculating headroom.
*/
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
LOG.info("Running testExcludeSchedReducesFromHeadroom");
Configuration conf = new Configuration();
conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
Task mockTask = mock(Task.class);
TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
MockNM nodeManager = rm.registerNode("h1:1234", 4096);
dispatcher.await();
// Register nodes to RM.
MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
dispatcher.await();
// Request 2 maps and 1 reducer(sone on nodes which are not registered).
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
allocator.sendRequest(event2);
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
allocator.sendRequest(event3);
// This will tell the scheduler about the requests but there will be no
// allocations as nodes are not added.
allocator.schedule();
dispatcher.await();
// Request for another reducer on h3 which has not registered.
ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
allocator.sendRequest(event4);
allocator.schedule();
dispatcher.await();
// Update resources in scheduler through node heartbeat from h1.
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
allocator.schedule();
dispatcher.await();
// Two maps are assigned.
Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
// Send deallocate request for map so that no maps are assigned after this.
ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
allocator.sendDeallocate(deallocate1);
ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
allocator.sendDeallocate(deallocate2);
// No map should be assigned.
Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
nodeManager.nodeHeartbeat(true);
dispatcher.await();
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
allocator.schedule();
dispatcher.await();
// h2 heartbeats.
nodeManager2.nodeHeartbeat(true);
dispatcher.await();
// Send request for one more mapper.
ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
allocator.sendRequest(event5);
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
allocator.schedule();
dispatcher.await();
// One reducer is assigned and one map is scheduled
Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
// Headroom enough to run a mapper if headroom is taken as it is but wont be
// enough if scheduled reducers resources are deducted.
rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
allocator.schedule();
dispatcher.await();
// After allocate response, the one assigned reducer is preempted and killed
Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId in project hadoop by apache.
the class TestRMContainerAllocator method testUpdatedNodes.
@Test
public void testUpdatedNodes() throws Exception {
Configuration conf = new Configuration();
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// add resources to scheduler
MockNM nm1 = rm.registerNode("h1:1234", 10240);
MockNM nm2 = rm.registerNode("h2:1234", 10240);
dispatcher.await();
// create the map container request
ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event);
TaskAttemptId attemptId = event.getAttemptID();
TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
Task mockTask = mock(Task.class);
when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
// this tells the scheduler about the requests
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
dispatcher.await();
nm1.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
allocator.getJobUpdatedNodeEvents().clear();
// get the assignment
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(1, assigned.size());
Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
// mark nodes bad
nm1.nodeHeartbeat(false);
nm2.nodeHeartbeat(false);
dispatcher.await();
// schedule response returns updated nodes
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// updated nodes are reported
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
allocator.getJobUpdatedNodeEvents().clear();
allocator.getTaskAttemptKillEvents().clear();
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
use of org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId in project hadoop by apache.
the class TestTaskAttempt method testAppDiognosticEventOnNewTask.
@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(resource.getMemorySize()).thenReturn(1024L);
setupTaskAttemptFinishingMonitor(eventHandler, jobConf, appCtx);
TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
NodeId nid = NodeId.newInstance("127.0.0.1", 0);
ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId, "Task got killed"));
assertFalse("InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError);
}
Aggregations