Search in sources :

Example 36 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testUnregistrationOnlyIfRegistered.

@Test
public void testUnregistrationOnlyIfRegistered() throws Exception {
    Configuration conf = new Configuration();
    final MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher rmDispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp rmApp = rm.submitApp(1024);
    rmDispatcher.await();
    MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 11264);
    amNodeManager.nodeHeartbeat(true);
    rmDispatcher.await();
    final ApplicationAttemptId appAttemptId = rmApp.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    rmDispatcher.await();
    MRApp mrApp = new MRApp(appAttemptId, ContainerId.newContainerId(appAttemptId, 0), 10, 0, false, this.getClass().getName(), true, 1) {

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }

        protected ContainerAllocator createContainerAllocator(ClientService clientService, AppContext context) {
            return new MyContainerAllocator(rm, appAttemptId, context);
        }

        ;
    };
    mrApp.submit(conf);
    DrainDispatcher amDispatcher = (DrainDispatcher) mrApp.getDispatcher();
    MyContainerAllocator allocator = (MyContainerAllocator) mrApp.getContainerAllocator();
    amDispatcher.await();
    Assert.assertTrue(allocator.isApplicationMasterRegistered());
    mrApp.stop();
    Assert.assertTrue(allocator.isUnregistered());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ClientService(org.apache.hadoop.mapreduce.v2.app.client.ClientService) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) RunningAppContext(org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Test(org.junit.Test)

Example 37 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 38 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testBlackListedNodesWithSchedulingToThatNode.

@Test
public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
    LOG.info("Running testBlackListedNodesWithSchedulingToThatNode");
    Configuration conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
    dispatcher.await();
    LOG.info("Requesting 1 Containers _1 on H1");
    // create the container request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    LOG.info("RM Heartbeat (to send the container requests)");
    // this tells the scheduler about the requests
    // as nodes are not added, no allocations
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
    LOG.info("Failing container _1 on H1 (should blacklist the node)");
    // Send events to blacklist nodes h1 and h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
    allocator.sendFailure(f1);
    //At this stage, a request should be created for a fast fail map
    //Create a FAST_FAIL request for a previously failed map.
    ContainerRequestEvent event1f = createReq(jobId, 1, 1024, new String[] { "h1" }, true, false);
    allocator.sendRequest(event1f);
    //Update the Scheduler with the new requests.
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(1, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    // send another request with different resource and priority
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1", "h3" });
    allocator.sendRequest(event3);
    //Allocator is aware of prio:5 container, and prio:20 (h1+h3) container.
    //RM is only aware of the prio:5 container
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //RMContainerAllocator gets assigned a p:5 on a blacklisted node.
    //Send a release for the p:5 container + another request.
    LOG.info("RM Heartbeat (To process the re-scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //Hearbeat from H3 to schedule on this host.
    LOG.info("h3 Heartbeat (To re-schedule the containers)");
    // Node heartbeat
    nodeManager3.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)");
    assigned = allocator.schedule();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    dispatcher.await();
    // For debugging
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        LOG.info(assig.getTaskAttemptID() + " assgined to " + assig.getContainer().getId() + " with priority " + assig.getContainer().getPriority());
    }
    Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
    // validate that all containers are assigned to h3
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        Assert.assertEquals("Assigned container " + assig.getContainer().getId() + " host not correct", "h3", assig.getContainer().getNodeId().getHost());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 39 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testAttemptNotFoundCausesRMCommunicatorException.

@Test(expected = RMContainerAllocationException.class)
public void testAttemptNotFoundCausesRMCommunicatorException() throws Exception {
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // Now kill the application
    rm.killApp(app.getApplicationId());
    rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
    allocator.schedule();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 40 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testUpdatedNodes.

@Test
public void testUpdatedNodes() throws Exception {
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nm1 = rm.registerNode("h1:1234", 10240);
    MockNM nm2 = rm.registerNode("h2:1234", 10240);
    dispatcher.await();
    // create the map container request
    ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event);
    TaskAttemptId attemptId = event.getAttemptID();
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
    Task mockTask = mock(Task.class);
    when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
    when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
    // this tells the scheduler about the requests
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    allocator.getJobUpdatedNodeEvents().clear();
    // get the assignment
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(1, assigned.size());
    Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
    // mark nodes bad
    nm1.nodeHeartbeat(false);
    nm2.nodeHeartbeat(false);
    dispatcher.await();
    // schedule response returns updated nodes
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // updated nodes are reported
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
    allocator.getJobUpdatedNodeEvents().clear();
    allocator.getTaskAttemptKillEvents().clear();
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)84 Test (org.junit.Test)73 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)54 Configuration (org.apache.hadoop.conf.Configuration)50 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)41 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)35 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)32 Path (org.apache.hadoop.fs.Path)24 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)24 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)23 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)16 DeletionService (org.apache.hadoop.yarn.server.nodemanager.DeletionService)16 LocalizerEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent)15 HashMap (java.util.HashMap)14 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)14 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)12 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)12