Search in sources :

Example 11 with ContainerRequestEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent in project hadoop by apache.

the class RMContainerAllocator method handleReduceContainerRequest.

@SuppressWarnings({ "unchecked" })
private void handleReduceContainerRequest(ContainerRequestEvent reqEvent) {
    assert (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.REDUCE));
    Resource supportedMaxContainerCapability = getMaxContainerCapability();
    JobId jobId = getJob().getID();
    if (reduceResourceRequest.equals(Resources.none())) {
        reduceResourceRequest = reqEvent.getCapability();
        eventHandler.handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.REDUCE, reduceResourceRequest.getMemorySize())));
        LOG.info("reduceResourceRequest:" + reduceResourceRequest);
    }
    boolean reduceContainerRequestAccepted = true;
    if (reduceResourceRequest.getMemorySize() > supportedMaxContainerCapability.getMemorySize() || reduceResourceRequest.getVirtualCores() > supportedMaxContainerCapability.getVirtualCores()) {
        reduceContainerRequestAccepted = false;
    }
    if (reduceContainerRequestAccepted) {
        // set the resources
        reqEvent.getCapability().setVirtualCores(reduceResourceRequest.getVirtualCores());
        reqEvent.getCapability().setMemorySize(reduceResourceRequest.getMemorySize());
        if (reqEvent.getEarlierAttemptFailed()) {
            //previously failed reducers are added to the front for fail fast
            pendingReduces.addFirst(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
        } else {
            //reduces are added to pending queue and are slowly ramped up
            pendingReduces.add(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
        }
    } else {
        String diagMsg = "REDUCE capability required is more than the " + "supported max container capability in the cluster. Killing" + " the Job. reduceResourceRequest: " + reduceResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability;
        LOG.info(diagMsg);
        eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
        eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    }
}
Also used : NormalizedResourceEvent(org.apache.hadoop.mapreduce.jobhistory.NormalizedResourceEvent) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 12 with ContainerRequestEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent in project hadoop by apache.

the class TestRMContainerAllocator method testRMContainerAllocatorResendsRequestsOnRMRestart.

// Step-1 : AM send allocate request for 2 ContainerRequests and 1
// blackListeNode
// Step-2 : 2 containers are allocated by RM.
// Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
// RM
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
// additional containerRequest(event4) and blacklisted nodes.
// Intern RM send resync command
// Step-5 : On Resync,AM sends all outstanding
// asks,release,blacklistAaddition
// and another containerRequest(event5)
// Step-6 : RM allocates containers i.e event3,event4 and cRequest5
@Test
public void testRMContainerAllocatorResendsRequestsOnRMRestart() throws Exception {
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
    conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0);
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MyResourceManager rm1 = new MyResourceManager(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm1, conf, appAttemptId, mockJob);
    // Step-1 : AM send allocate request for 2 ContainerRequests and 1
    // blackListeNode
    // create the container request
    // send MAP request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 2048, new String[] { "h1", "h2" });
    allocator.sendRequest(event2);
    // Send events to blacklist h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h2", false);
    allocator.sendFailure(f1);
    // send allocate request and 1 blacklisted nodes
    List<TaskAttemptContainerAssignedEvent> assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    // Why ask is 3, not 4? --> ask from blacklisted node h2 is removed
    assertAsksAndReleases(3, 0, rm1);
    assertBlacklistAdditionsAndRemovals(1, 0, rm1);
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-2 : 2 containers are allocated by RM.
    assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 2", 2, assignedContainers.size());
    assertAsksAndReleases(0, 0, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    assignedContainers = allocator.schedule();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    assertAsksAndReleases(3, 0, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    // Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to
    // RM
    // send container request
    ContainerRequestEvent event3 = createReq(jobId, 3, 1000, new String[] { "h1" });
    allocator.sendRequest(event3);
    // send deallocate request
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    assignedContainers = allocator.schedule();
    Assert.assertEquals("No of assignments must be 0", 0, assignedContainers.size());
    assertAsksAndReleases(3, 1, rm1);
    assertBlacklistAdditionsAndRemovals(0, 0, rm1);
    // Phase-2 start 2nd RM is up
    MyResourceManager rm2 = new MyResourceManager(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    allocator.updateSchedulerProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
    nm1.registerNode();
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-4 : On RM restart, AM(does not know RM is restarted) sends
    // additional containerRequest(event4) and blacklisted nodes.
    // Intern RM send resync command
    // send deallocate request, release=1
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // Send events to blacklist nodes h3
    ContainerFailedEvent f2 = createFailEvent(jobId, 1, "h3", false);
    allocator.sendFailure(f2);
    ContainerRequestEvent event4 = createReq(jobId, 4, 2000, new String[] { "h1", "h2" });
    allocator.sendRequest(event4);
    // send allocate request to 2nd RM and get resync command
    allocator.schedule();
    dispatcher.await();
    // Step-5 : On Resync,AM sends all outstanding
    // asks,release,blacklistAaddition
    // and another containerRequest(event5)
    ContainerRequestEvent event5 = createReq(jobId, 5, 3000, new String[] { "h1", "h2", "h3" });
    allocator.sendRequest(event5);
    // send all outstanding request again.
    assignedContainers = allocator.schedule();
    dispatcher.await();
    assertAsksAndReleases(3, 2, rm2);
    assertBlacklistAdditionsAndRemovals(2, 0, rm2);
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    // Step-6 : RM allocates containers i.e event3,event4 and cRequest5
    assignedContainers = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("Number of container should be 3", 3, assignedContainers.size());
    for (TaskAttemptContainerAssignedEvent assig : assignedContainers) {
        Assert.assertTrue("Assigned count not correct", "h1".equals(assig.getContainer().getNodeId().getHost()));
    }
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 13 with ContainerRequestEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 14 with ContainerRequestEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent in project hadoop by apache.

the class TestRMContainerAllocator method testConcurrentTaskLimits.

@Test
public void testConcurrentTaskLimits() throws Exception {
    final int MAP_LIMIT = 3;
    final int REDUCE_LIMIT = 1;
    LOG.info("Running testConcurrentTaskLimits");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.JOB_RUNNING_MAP_LIMIT, MAP_LIMIT);
    conf.setInt(MRJobConfig.JOB_RUNNING_REDUCE_LIMIT, REDUCE_LIMIT);
    conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0f);
    ApplicationId appId = ApplicationId.newInstance(1, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    final MockScheduler mockScheduler = new MockScheduler(appAttemptId);
    MyContainerAllocator allocator = new MyContainerAllocator(null, conf, appAttemptId, mockJob, SystemClock.getInstance()) {

        @Override
        protected void register() {
        }

        @Override
        protected ApplicationMasterProtocol createSchedulerProxy() {
            return mockScheduler;
        }
    };
    // create some map requests
    ContainerRequestEvent[] reqMapEvents = new ContainerRequestEvent[5];
    for (int i = 0; i < reqMapEvents.length; ++i) {
        reqMapEvents[i] = createReq(jobId, i, 1024, new String[] { "h" + i });
    }
    allocator.sendRequests(Arrays.asList(reqMapEvents));
    // create some reduce requests
    ContainerRequestEvent[] reqReduceEvents = new ContainerRequestEvent[2];
    for (int i = 0; i < reqReduceEvents.length; ++i) {
        reqReduceEvents[i] = createReq(jobId, i, 1024, new String[] {}, false, true);
    }
    allocator.sendRequests(Arrays.asList(reqReduceEvents));
    allocator.schedule();
    // verify all of the host-specific asks were sent plus one for the
    // default rack and one for the ANY request
    Assert.assertEquals(reqMapEvents.length + 2, mockScheduler.lastAsk.size());
    // verify AM is only asking for the map limit overall
    Assert.assertEquals(MAP_LIMIT, mockScheduler.lastAnyAskMap);
    // assign a map task and verify we do not ask for any more maps
    ContainerId cid0 = mockScheduler.assignContainer("h0", false);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(2, mockScheduler.lastAnyAskMap);
    // complete the map task and verify that we ask for one more
    mockScheduler.completeContainer(cid0);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(3, mockScheduler.lastAnyAskMap);
    // assign three more maps and verify we ask for no more maps
    ContainerId cid1 = mockScheduler.assignContainer("h1", false);
    ContainerId cid2 = mockScheduler.assignContainer("h2", false);
    ContainerId cid3 = mockScheduler.assignContainer("h3", false);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskMap);
    // complete two containers and verify we only asked for one more
    // since at that point all maps should be scheduled/completed
    mockScheduler.completeContainer(cid2);
    mockScheduler.completeContainer(cid3);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(1, mockScheduler.lastAnyAskMap);
    // allocate the last container and complete the first one
    // and verify there are no more map asks.
    mockScheduler.completeContainer(cid1);
    ContainerId cid4 = mockScheduler.assignContainer("h4", false);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskMap);
    // complete the last map
    mockScheduler.completeContainer(cid4);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskMap);
    // verify only reduce limit being requested
    Assert.assertEquals(REDUCE_LIMIT, mockScheduler.lastAnyAskReduce);
    // assign a reducer and verify ask goes to zero
    cid0 = mockScheduler.assignContainer("h0", true);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskReduce);
    // complete the reducer and verify we ask for another
    mockScheduler.completeContainer(cid0);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(1, mockScheduler.lastAnyAskReduce);
    // assign a reducer and verify ask goes to zero
    cid0 = mockScheduler.assignContainer("h0", true);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskReduce);
    // complete the reducer and verify no more reducers
    mockScheduler.completeContainer(cid0);
    allocator.schedule();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskReduce);
    allocator.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 15 with ContainerRequestEvent

use of org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent in project hadoop by apache.

the class TestRMContainerAllocator method testBlackListedNodesWithSchedulingToThatNode.

@Test
public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
    LOG.info("Running testBlackListedNodesWithSchedulingToThatNode");
    Configuration conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
    dispatcher.await();
    LOG.info("Requesting 1 Containers _1 on H1");
    // create the container request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    LOG.info("RM Heartbeat (to send the container requests)");
    // this tells the scheduler about the requests
    // as nodes are not added, no allocations
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
    LOG.info("Failing container _1 on H1 (should blacklist the node)");
    // Send events to blacklist nodes h1 and h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
    allocator.sendFailure(f1);
    //At this stage, a request should be created for a fast fail map
    //Create a FAST_FAIL request for a previously failed map.
    ContainerRequestEvent event1f = createReq(jobId, 1, 1024, new String[] { "h1" }, true, false);
    allocator.sendRequest(event1f);
    //Update the Scheduler with the new requests.
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(1, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    // send another request with different resource and priority
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1", "h3" });
    allocator.sendRequest(event3);
    //Allocator is aware of prio:5 container, and prio:20 (h1+h3) container.
    //RM is only aware of the prio:5 container
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //RMContainerAllocator gets assigned a p:5 on a blacklisted node.
    //Send a release for the p:5 container + another request.
    LOG.info("RM Heartbeat (To process the re-scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //Hearbeat from H3 to schedule on this host.
    LOG.info("h3 Heartbeat (To re-schedule the containers)");
    // Node heartbeat
    nodeManager3.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)");
    assigned = allocator.schedule();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    dispatcher.await();
    // For debugging
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        LOG.info(assig.getTaskAttemptID() + " assgined to " + assig.getContainer().getId() + " with priority " + assig.getContainer().getPriority());
    }
    Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
    // validate that all containers are assigned to h3
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        Assert.assertEquals("Assigned container " + assig.getContainer().getId() + " host not correct", "h3", assig.getContainer().getNodeId().getHost());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)20 Test (org.junit.Test)20 Configuration (org.apache.hadoop.conf.Configuration)18 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)18 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)18 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)18 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)14 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)14 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)14 TaskAttemptContainerAssignedEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent)11 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)5 Resource (org.apache.hadoop.yarn.api.records.Resource)5 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)4 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 ControlledClock (org.apache.hadoop.yarn.util.ControlledClock)4 Container (org.apache.hadoop.yarn.api.records.Container)3 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 MapTaskAttemptImpl (org.apache.hadoop.mapred.MapTaskAttemptImpl)2 NormalizedResourceEvent (org.apache.hadoop.mapreduce.jobhistory.NormalizedResourceEvent)2