Search in sources :

Example 46 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testBlackListedNodesWithSchedulingToThatNode.

@Test
public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
    LOG.info("Running testBlackListedNodesWithSchedulingToThatNode");
    Configuration conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
    dispatcher.await();
    LOG.info("Requesting 1 Containers _1 on H1");
    // create the container request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    LOG.info("RM Heartbeat (to send the container requests)");
    // this tells the scheduler about the requests
    // as nodes are not added, no allocations
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
    LOG.info("Failing container _1 on H1 (should blacklist the node)");
    // Send events to blacklist nodes h1 and h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
    allocator.sendFailure(f1);
    //At this stage, a request should be created for a fast fail map
    //Create a FAST_FAIL request for a previously failed map.
    ContainerRequestEvent event1f = createReq(jobId, 1, 1024, new String[] { "h1" }, true, false);
    allocator.sendRequest(event1f);
    //Update the Scheduler with the new requests.
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(1, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    // send another request with different resource and priority
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1", "h3" });
    allocator.sendRequest(event3);
    //Allocator is aware of prio:5 container, and prio:20 (h1+h3) container.
    //RM is only aware of the prio:5 container
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //RMContainerAllocator gets assigned a p:5 on a blacklisted node.
    //Send a release for the p:5 container + another request.
    LOG.info("RM Heartbeat (To process the re-scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //Hearbeat from H3 to schedule on this host.
    LOG.info("h3 Heartbeat (To re-schedule the containers)");
    // Node heartbeat
    nodeManager3.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)");
    assigned = allocator.schedule();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    dispatcher.await();
    // For debugging
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        LOG.info(assig.getTaskAttemptID() + " assgined to " + assig.getContainer().getId() + " with priority " + assig.getContainer().getPriority());
    }
    Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
    // validate that all containers are assigned to h3
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        Assert.assertEquals("Assigned container " + assig.getContainer().getId() + " host not correct", "h3", assig.getContainer().getNodeId().getHost());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 47 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testAttemptNotFoundCausesRMCommunicatorException.

@Test(expected = RMContainerAllocationException.class)
public void testAttemptNotFoundCausesRMCommunicatorException() throws Exception {
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // Now kill the application
    rm.killApp(app.getApplicationId());
    rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
    allocator.schedule();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 48 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testUpdatedNodes.

@Test
public void testUpdatedNodes() throws Exception {
    Configuration conf = new Configuration();
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nm1 = rm.registerNode("h1:1234", 10240);
    MockNM nm2 = rm.registerNode("h2:1234", 10240);
    dispatcher.await();
    // create the map container request
    ContainerRequestEvent event = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event);
    TaskAttemptId attemptId = event.getAttemptID();
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockTaskAttempt.getNodeId()).thenReturn(nm1.getNodeId());
    Task mockTask = mock(Task.class);
    when(mockTask.getAttempt(attemptId)).thenReturn(mockTaskAttempt);
    when(mockJob.getTask(attemptId.getTaskId())).thenReturn(mockTask);
    // this tells the scheduler about the requests
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(3, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    allocator.getJobUpdatedNodeEvents().clear();
    // get the assignment
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(1, assigned.size());
    Assert.assertEquals(nm1.getNodeId(), assigned.get(0).getContainer().getNodeId());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
    // mark nodes bad
    nm1.nodeHeartbeat(false);
    nm2.nodeHeartbeat(false);
    dispatcher.await();
    // schedule response returns updated nodes
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // updated nodes are reported
    Assert.assertEquals(1, allocator.getJobUpdatedNodeEvents().size());
    Assert.assertEquals(1, allocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(2, allocator.getJobUpdatedNodeEvents().get(0).getUpdatedNodes().size());
    Assert.assertEquals(attemptId, allocator.getTaskAttemptKillEvents().get(0).getTaskAttemptID());
    allocator.getJobUpdatedNodeEvents().clear();
    allocator.getTaskAttemptKillEvents().clear();
    assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals(0, assigned.size());
    // no updated nodes reported
    Assert.assertTrue(allocator.getJobUpdatedNodeEvents().isEmpty());
    Assert.assertTrue(allocator.getTaskAttemptKillEvents().isEmpty());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 49 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestRMContainerAllocator method testCompletedTasksRecalculateSchedule.

@Test
public void testCompletedTasksRecalculateSchedule() throws Exception {
    LOG.info("Running testCompletedTasksRecalculateSchedule");
    Configuration conf = new Configuration();
    final MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    // Make a node to register so as to launch the AM.
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job job = mock(Job.class);
    when(job.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    doReturn(10).when(job).getTotalMaps();
    doReturn(10).when(job).getTotalReduces();
    doReturn(0).when(job).getCompletedMaps();
    RecalculateContainerAllocator allocator = new RecalculateContainerAllocator(rm, conf, appAttemptId, job);
    allocator.schedule();
    allocator.recalculatedReduceSchedule = false;
    allocator.schedule();
    Assert.assertFalse("Unexpected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
    doReturn(1).when(job).getCompletedMaps();
    allocator.schedule();
    Assert.assertTrue("Expected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 50 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class ACLsTestBase method setup.

@Before
public void setup() throws InterruptedException, IOException {
    conf = createConfiguration();
    rpc = YarnRPC.create(conf);
    rmAddress = conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);
    AccessControlList adminACL = new AccessControlList("");
    conf.set(YarnConfiguration.YARN_ADMIN_ACL, adminACL.getAclString());
    conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, 10);
    resourceManager = new MockRM(conf) {

        protected ClientRMService createClientRMService() {
            return new ClientRMService(getRMContext(), this.scheduler, this.rmAppManager, this.applicationACLsManager, this.queueACLsManager, getRMContext().getRMDelegationTokenSecretManager());
        }

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }

        @Override
        protected void doSecureLogin() throws IOException {
        }
    };
    new Thread() {

        public void run() {
            resourceManager.start();
        }

        ;
    }.start();
    int waitCount = 0;
    while (resourceManager.getServiceState() == STATE.INITED && waitCount++ < 60) {
        LOG.info("Waiting for RM to start...");
        Thread.sleep(1500);
    }
    if (resourceManager.getServiceState() != STATE.STARTED) {
        // RM could have failed.
        throw new IOException("ResourceManager failed to start. Final state is " + resourceManager.getServiceState());
    }
}
Also used : AccessControlList(org.apache.hadoop.security.authorize.AccessControlList) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) IOException(java.io.IOException) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) Before(org.junit.Before)

Aggregations

DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)84 Test (org.junit.Test)73 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)54 Configuration (org.apache.hadoop.conf.Configuration)50 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)41 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)35 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)32 Path (org.apache.hadoop.fs.Path)24 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)24 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)23 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)16 DeletionService (org.apache.hadoop.yarn.server.nodemanager.DeletionService)16 LocalizerEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent)15 HashMap (java.util.HashMap)14 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)14 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)12 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)12