Search in sources :

Example 1 with ContainerAllocatorEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent in project hadoop by apache.

the class TestRMContainerAllocator method testUpdateAskOnRampDownAllReduces.

@Test
public void testUpdateAskOnRampDownAllReduces() throws Exception {
    LOG.info("Running testUpdateAskOnRampDownAllReduces");
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // Use a controlled clock to advance time for test.
    ControlledClock clock = (ControlledClock) allocator.getContext().getClock();
    clock.setTime(System.currentTimeMillis());
    // Register nodes to RM.
    MockNM nodeManager = rm.registerNode("h1:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h2" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Advance clock so that maps can be considered as hanging.
    clock.setTime(System.currentTimeMillis() + 500000L);
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // One map is assigned.
    Assert.assertEquals(1, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate);
    // Now one reducer should be scheduled and one should be pending.
    Assert.assertEquals(1, allocator.getScheduledRequests().reduces.size());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
    // No map should be assigned and one should be scheduled.
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    Assert.assertEquals(6, allocator.getAsk().size());
    for (ResourceRequest req : allocator.getAsk()) {
        boolean isReduce = req.getPriority().equals(RMContainerAllocator.PRIORITY_REDUCE);
        if (isReduce) {
            // 1 reducer each asked on h2, * and default-rack
            Assert.assertTrue((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2")) && req.getNumContainers() == 1);
        } else {
            //map
            // 0 mappers asked on h1 and 1 each on * and default-rack
            Assert.assertTrue(((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack")) && req.getNumContainers() == 1) || (req.getResourceName().equals("h1") && req.getNumContainers() == 0));
        }
    }
    // On next allocate request to scheduler, headroom reported will be 0.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(0, 0));
    allocator.schedule();
    dispatcher.await();
    // After allocate response from scheduler, all scheduled reduces are ramped
    // down and move to pending. 3 asks are also updated with 0 containers to
    // indicate ramping down of reduces to scheduler.
    Assert.assertEquals(0, allocator.getScheduledRequests().reduces.size());
    Assert.assertEquals(2, allocator.getNumOfPendingReduces());
    Assert.assertEquals(3, allocator.getAsk().size());
    for (ResourceRequest req : allocator.getAsk()) {
        Assert.assertEquals(RMContainerAllocator.PRIORITY_REDUCE, req.getPriority());
        Assert.assertTrue(req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2"));
        Assert.assertEquals(Resource.newInstance(1024, 1), req.getCapability());
        Assert.assertEquals(0, req.getNumContainers());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ControlledClock(org.apache.hadoop.yarn.util.ControlledClock) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 2 with ContainerAllocatorEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent in project hadoop by apache.

the class TestRMContainerAllocator method testAvoidAskMoreReducersWhenReducerPreemptionIsRequired.

@Test
public void testAvoidAskMoreReducersWhenReducerPreemptionIsRequired() throws Exception {
    LOG.info("Running testAvoidAskMoreReducersWhenReducerPreemptionIsRequired");
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // Use a controlled clock to advance time for test.
    ControlledClock clock = (ControlledClock) allocator.getContext().getClock();
    clock.setTime(System.currentTimeMillis());
    // Register nodes to RM.
    MockNM nodeManager = rm.registerNode("h1:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h2" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Advance clock so that maps can be considered as hanging.
    clock.setTime(System.currentTimeMillis() + 500000L);
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // One map is assigned.
    Assert.assertEquals(1, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate);
    // Now one reducer should be scheduled and one should be pending.
    Assert.assertEquals(1, allocator.getScheduledRequests().reduces.size());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
    // No map should be assigned and one should be scheduled.
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    Assert.assertEquals(6, allocator.getAsk().size());
    for (ResourceRequest req : allocator.getAsk()) {
        boolean isReduce = req.getPriority().equals(RMContainerAllocator.PRIORITY_REDUCE);
        if (isReduce) {
            // 1 reducer each asked on h2, * and default-rack
            Assert.assertTrue((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2")) && req.getNumContainers() == 1);
        } else {
            //map
            // 0 mappers asked on h1 and 1 each on * and default-rack
            Assert.assertTrue(((req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack")) && req.getNumContainers() == 1) || (req.getResourceName().equals("h1") && req.getNumContainers() == 0));
        }
    }
    clock.setTime(System.currentTimeMillis() + 500000L + 10 * 60 * 1000);
    // On next allocate request to scheduler, headroom reported will be 2048.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 0));
    allocator.schedule();
    dispatcher.await();
    // After allocate response from scheduler, all scheduled reduces are ramped
    // down and move to pending. 3 asks are also updated with 0 containers to
    // indicate ramping down of reduces to scheduler.
    Assert.assertEquals(0, allocator.getScheduledRequests().reduces.size());
    Assert.assertEquals(2, allocator.getNumOfPendingReduces());
    Assert.assertEquals(3, allocator.getAsk().size());
    for (ResourceRequest req : allocator.getAsk()) {
        Assert.assertEquals(RMContainerAllocator.PRIORITY_REDUCE, req.getPriority());
        Assert.assertTrue(req.getResourceName().equals("*") || req.getResourceName().equals("/default-rack") || req.getResourceName().equals("h2"));
        Assert.assertEquals(Resource.newInstance(1024, 1), req.getCapability());
        Assert.assertEquals(0, req.getNumContainers());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ControlledClock(org.apache.hadoop.yarn.util.ControlledClock) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 3 with ContainerAllocatorEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent in project hadoop by apache.

the class TestLocalContainerAllocator method testAllocatedContainerResourceIsNotNull.

@Test
public void testAllocatedContainerResourceIsNotNull() {
    ArgumentCaptor<TaskAttemptContainerAssignedEvent> containerAssignedCaptor = ArgumentCaptor.forClass(TaskAttemptContainerAssignedEvent.class);
    @SuppressWarnings("unchecked") EventHandler<Event> eventHandler = mock(EventHandler.class);
    AppContext context = mock(AppContext.class);
    when(context.getEventHandler()).thenReturn(eventHandler);
    ContainerId containerId = ContainerId.fromString("container_1427562107907_0002_01_000001");
    LocalContainerAllocator containerAllocator = new LocalContainerAllocator(mock(ClientService.class), context, "localhost", -1, -1, containerId);
    ContainerAllocatorEvent containerAllocatorEvent = createContainerRequestEvent();
    containerAllocator.handle(containerAllocatorEvent);
    verify(eventHandler, times(1)).handle(containerAssignedCaptor.capture());
    Container container = containerAssignedCaptor.getValue().getContainer();
    Resource containerResource = container.getResource();
    Assert.assertNotNull(containerResource);
    Assert.assertEquals(containerResource.getMemorySize(), 0);
    Assert.assertEquals(containerResource.getVirtualCores(), 0);
}
Also used : UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ClientService(org.apache.hadoop.mapreduce.v2.app.client.ClientService) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) ContainerAllocatorEvent(org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) Event(org.apache.hadoop.yarn.event.Event) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) ContainerAllocatorEvent(org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) Test(org.junit.Test)

Example 4 with ContainerAllocatorEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent in project hadoop by apache.

the class TestRMContainerAllocator method testRMContainerAllocatorResendsRequestsOnRMRestart.

// Step-1 : AM send allocate request for 2 ContainerRequests and 1
// blackListeNode
// Step-2 : 2 containers are allocated by RM.
// Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
// RM
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
// additional containerRequest(event4) and blacklisted nodes.
// Intern RM send resync command
// Step-5 : On Resync,AM sends all outstanding
// asks,release,blacklistAaddition
// and another containerRequest(event5)
// Step-6 : RM allocates containers i.e event3,event4 and cRequest5
@Test
public void testRMContainerAllocatorResendsRequestsOnRMRestart() throws Exception {
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
    conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0);
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MyResourceManager rm1 = new MyResourceManager(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm1, conf, appAttemptId, mockJob);
    // Step-1 : AM send allocate request for 2 ContainerRequests and 1
    // blackListeNode
    // create the container request
    // send MAP request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 2048, new String[] { "h1", "h2" });
    allocator.sendRequest(event2);
    // Send events to blacklist h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h2", false);
    allocator.sendFailure(f1);
    // send allocate request and 1 blacklisted nodes
    List<TaskAttemptContainerAssignedEvent> assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    // Why ask is 3, not 4? --> ask from blacklisted node h2 is removed
    assertAsksAndReleases(3, 0, rm1);
    assertBlacklistAdditionsAndRemovals(1, 0, rm1);
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-2 : 2 containers are allocated by RM.
    assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 2", 2, assignedContainers.size());
    assertAsksAndReleases(0, 0, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    assignedContainers = allocator.schedule();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    assertAsksAndReleases(3, 0, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    // Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
    // RM
    // send container request
    ContainerRequestEvent event3 = createReq(jobId, 3, 1000, new String[] { "h1" });
    allocator.sendRequest(event3);
    // send deallocate request
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    assignedContainers = allocator.schedule();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    assertAsksAndReleases(3, 1, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    // Phase-2 start 2nd RM is up
    MyResourceManager rm2 = new MyResourceManager(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    allocator.updateSchedulerProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
    nm1.registerNode();
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-4 : On RM restart, AM(does not know RM is restarted) sends
    // additional containerRequest(event4) and blacklisted nodes.
    // Intern RM send resync command
    // send deallocate request, release=1
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // Send events to blacklist nodes h3
    ContainerFailedEvent f2 = createFailEvent(jobId, 1, "h3", false);
    allocator.sendFailure(f2);
    ContainerRequestEvent event4 = createReq(jobId, 4, 2000, new String[] { "h1", "h2" });
    allocator.sendRequest(event4);
    // send allocate request to 2nd RM and get resync command
    allocator.schedule();
    dispatcher.await();
    // Step-5 : On Resync,AM sends all outstanding
    // asks,release,blacklistAaddition
    // and another containerRequest(event5)
    ContainerRequestEvent event5 = createReq(jobId, 5, 3000, new String[] { "h1", "h2", "h3" });
    allocator.sendRequest(event5);
    // send all outstanding request again.
    assignedContainers = allocator.schedule();
    dispatcher.await();
    assertAsksAndReleases(3, 2, rm2);
    assertBlacklistAdditionsAndRemovals(2, 0, rm2);
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-6 : RM allocates containers i.e event3,event4 and cRequest5
    assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("Number of container should be 3", 3, assignedContainers.size());
    for (TaskAttemptContainerAssignedEvent assig : assignedContainers) {
        Assert.assertTrue("Assigned count not correct", "h1".equals(assig.getContainer().getNodeId().getHost()));
    }
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 5 with ContainerAllocatorEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)5 Configuration (org.apache.hadoop.conf.Configuration)4 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)4 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)4 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)4 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)4 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)4 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 TaskAttemptContainerAssignedEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent)3 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)2 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)2 ContainerAllocatorEvent (org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent)2 Container (org.apache.hadoop.yarn.api.records.Container)2 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)2 ControlledClock (org.apache.hadoop.yarn.util.ControlledClock)2 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)1 ClientService (org.apache.hadoop.mapreduce.v2.app.client.ClientService)1 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)1