use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestLocalContainerAllocator method testAllocatedContainerResourceIsNotNull.
@Test
public void testAllocatedContainerResourceIsNotNull() {
ArgumentCaptor<TaskAttemptContainerAssignedEvent> containerAssignedCaptor = ArgumentCaptor.forClass(TaskAttemptContainerAssignedEvent.class);
@SuppressWarnings("unchecked") EventHandler<Event> eventHandler = mock(EventHandler.class);
AppContext context = mock(AppContext.class);
when(context.getEventHandler()).thenReturn(eventHandler);
ContainerId containerId = ContainerId.fromString("container_1427562107907_0002_01_000001");
LocalContainerAllocator containerAllocator = new LocalContainerAllocator(mock(ClientService.class), context, "localhost", -1, -1, containerId);
ContainerAllocatorEvent containerAllocatorEvent = createContainerRequestEvent();
containerAllocator.handle(containerAllocatorEvent);
verify(eventHandler, times(1)).handle(containerAssignedCaptor.capture());
Container container = containerAssignedCaptor.getValue().getContainer();
Resource containerResource = container.getResource();
Assert.assertNotNull(containerResource);
Assert.assertEquals(containerResource.getMemorySize(), 0);
Assert.assertEquals(containerResource.getVirtualCores(), 0);
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method testRMContainerAllocatorResendsRequestsOnRMRestart.
// Step-1 : AM send allocate request for 2 ContainerRequests and 1
// blackListeNode
// Step-2 : 2 containers are allocated by RM.
// Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
// RM
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
// additional containerRequest(event4) and blacklisted nodes.
// Intern RM send resync command
// Step-5 : On Resync,AM sends all outstanding
// asks,release,blacklistAaddition
// and another containerRequest(event5)
// Step-6 : RM allocates containers i.e event3,event4 and cRequest5
@Test
public void testRMContainerAllocatorResendsRequestsOnRMRestart() throws Exception {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0);
conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MyResourceManager rm1 = new MyResourceManager(conf, memStore);
rm1.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm1.submitApp(1024);
dispatcher.await();
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// Node heartbeat
nm1.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm1.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm1, conf, appAttemptId, mockJob);
// Step-1 : AM send allocate request for 2 ContainerRequests and 1
// blackListeNode
// create the container request
// send MAP request
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
ContainerRequestEvent event2 = createReq(jobId, 2, 2048, new String[] { "h1", "h2" });
allocator.sendRequest(event2);
// Send events to blacklist h2
ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h2", false);
allocator.sendFailure(f1);
// send allocate request and 1 blacklisted nodes
List<TaskAttemptContainerAssignedEvent> assignedContainers = allocator.schedule();
dispatcher.await();
Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
// Why ask is 3, not 4? --> ask from blacklisted node h2 is removed
assertAsksAndReleases(3, 0, rm1);
assertBlacklistAdditionsAndRemovals(1, 0, rm1);
// Node heartbeat
nm1.nodeHeartbeat(true);
dispatcher.await();
// Step-2 : 2 containers are allocated by RM.
assignedContainers = allocator.schedule();
dispatcher.await();
Assert.assertEquals("No of assignments must be 2", 2, assignedContainers.size());
assertAsksAndReleases(0, 0, rm1);
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
assignedContainers = allocator.schedule();
Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
assertAsksAndReleases(3, 0, rm1);
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
// Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
// RM
// send container request
ContainerRequestEvent event3 = createReq(jobId, 3, 1000, new String[] { "h1" });
allocator.sendRequest(event3);
// send deallocate request
ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
allocator.sendDeallocate(deallocate1);
assignedContainers = allocator.schedule();
Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
assertAsksAndReleases(3, 1, rm1);
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
// Phase-2 start 2nd RM is up
MyResourceManager rm2 = new MyResourceManager(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
allocator.updateSchedulerProxy(rm2);
dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
// NM should be rebooted on heartbeat, even first heartbeat for nm2
NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
// new NM to represent NM re-register
nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
nm1.registerNode();
nm1.nodeHeartbeat(true);
dispatcher.await();
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
// additional containerRequest(event4) and blacklisted nodes.
// Intern RM send resync command
// send deallocate request, release=1
ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
allocator.sendDeallocate(deallocate2);
// Send events to blacklist nodes h3
ContainerFailedEvent f2 = createFailEvent(jobId, 1, "h3", false);
allocator.sendFailure(f2);
ContainerRequestEvent event4 = createReq(jobId, 4, 2000, new String[] { "h1", "h2" });
allocator.sendRequest(event4);
// send allocate request to 2nd RM and get resync command
allocator.schedule();
dispatcher.await();
// Step-5 : On Resync,AM sends all outstanding
// asks,release,blacklistAaddition
// and another containerRequest(event5)
ContainerRequestEvent event5 = createReq(jobId, 5, 3000, new String[] { "h1", "h2", "h3" });
allocator.sendRequest(event5);
// send all outstanding request again.
assignedContainers = allocator.schedule();
dispatcher.await();
assertAsksAndReleases(3, 2, rm2);
assertBlacklistAdditionsAndRemovals(2, 0, rm2);
nm1.nodeHeartbeat(true);
dispatcher.await();
// Step-6 : RM allocates containers i.e event3,event4 and cRequest5
assignedContainers = allocator.schedule();
dispatcher.await();
Assert.assertEquals("Number of container should be 3", 3, assignedContainers.size());
for (TaskAttemptContainerAssignedEvent assig : assignedContainers) {
Assert.assertTrue("Assigned count not correct", "h1".equals(assig.getContainer().getNodeId().getHost()));
}
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method testBlackListedNodesWithSchedulingToThatNode.
@Test
public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
LOG.info("Running testBlackListedNodesWithSchedulingToThatNode");
Configuration conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// add resources to scheduler
MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
dispatcher.await();
LOG.info("Requesting 1 Containers _1 on H1");
// create the container request
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
LOG.info("RM Heartbeat (to send the container requests)");
// this tells the scheduler about the requests
// as nodes are not added, no allocations
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
LOG.info("h1 Heartbeat (To actually schedule the containers)");
// update resources in scheduler
// Node heartbeat
nodeManager1.nodeHeartbeat(true);
dispatcher.await();
LOG.info("RM Heartbeat (To process the scheduled containers)");
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
LOG.info("Failing container _1 on H1 (should blacklist the node)");
// Send events to blacklist nodes h1 and h2
ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
allocator.sendFailure(f1);
//At this stage, a request should be created for a fast fail map
//Create a FAST_FAIL request for a previously failed map.
ContainerRequestEvent event1f = createReq(jobId, 1, 1024, new String[] { "h1" }, true, false);
allocator.sendRequest(event1f);
//Update the Scheduler with the new requests.
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(1, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// send another request with different resource and priority
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1", "h3" });
allocator.sendRequest(event3);
//Allocator is aware of prio:5 container, and prio:20 (h1+h3) container.
//RM is only aware of the prio:5 container
LOG.info("h1 Heartbeat (To actually schedule the containers)");
// update resources in scheduler
// Node heartbeat
nodeManager1.nodeHeartbeat(true);
dispatcher.await();
LOG.info("RM Heartbeat (To process the scheduled containers)");
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
//RMContainerAllocator gets assigned a p:5 on a blacklisted node.
//Send a release for the p:5 container + another request.
LOG.info("RM Heartbeat (To process the re-scheduled containers)");
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
//Hearbeat from H3 to schedule on this host.
LOG.info("h3 Heartbeat (To re-schedule the containers)");
// Node heartbeat
nodeManager3.nodeHeartbeat(true);
dispatcher.await();
LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)");
assigned = allocator.schedule();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
dispatcher.await();
// For debugging
for (TaskAttemptContainerAssignedEvent assig : assigned) {
LOG.info(assig.getTaskAttemptID() + " assgined to " + assig.getContainer().getId() + " with priority " + assig.getContainer().getPriority());
}
Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
// validate that all containers are assigned to h3
for (TaskAttemptContainerAssignedEvent assig : assigned) {
Assert.assertEquals("Assigned container " + assig.getContainer().getId() + " host not correct", "h3", assig.getContainer().getNodeId().getHost());
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method testUpdatedNodes.
@Test
public void testUpdatedNodes() throws Exception {
Configuration conf = new Configuration();
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// add resources to scheduler
MockNM nm1 = rm.registerNode("h1:1234", 10240);
MockNM nm2 = rm.registerNode("h2:1234", 10240);
dispatcher.await();
// create the map container request
ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event);
TaskAttemptId attemptId = event.getAttemptID();
TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
Task mockTask = mock(Task.class);
when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
// this tells the scheduler about the requests
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
dispatcher.await();
nm1.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
allocator.getJobUpdatedNodeEvents().clear();
// get the assignment
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(1, assigned.size());
Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
// mark nodes bad
nm1.nodeHeartbeat(false);
nm2.nodeHeartbeat(false);
dispatcher.await();
// schedule response returns updated nodes
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// updated nodes are reported
Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
allocator.getJobUpdatedNodeEvents().clear();
allocator.getTaskAttemptKillEvents().clear();
assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals(0, assigned.size());
// no updated nodes reported
Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestTaskAttempt method testContainerKillWhileRunning.
@Test
public void testContainerKillWhileRunning() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(resource.getMemorySize()).thenReturn(1024L);
TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
NodeId nid = NodeId.newInstance("127.0.0.2", 0);
ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE));
taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class)));
taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0));
assertEquals("Task attempt is not in running state", taImpl.getState(), TaskAttemptState.RUNNING);
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL));
assertFalse("InternalError occurred trying to handle TA_KILL", eventHandler.internalError);
assertEquals("Task should be in KILL_CONTAINER_CLEANUP state", TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, taImpl.getInternalState());
}
Aggregations