Search in sources :

Example 1 with RMContainerAllocator

use of org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator in project hadoop by apache.

the class TestRMContainerAllocator method testBlackListedNodesWithSchedulingToThatNode.

@Test
public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
    LOG.info("Running testBlackListedNodesWithSchedulingToThatNode");
    Configuration conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
    conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
    conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    // add resources to scheduler
    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
    dispatcher.await();
    LOG.info("Requesting 1 Containers _1 on H1");
    // create the container request
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    LOG.info("RM Heartbeat (to send the container requests)");
    // this tells the scheduler about the requests
    // as nodes are not added, no allocations
    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
    dispatcher.await();
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
    LOG.info("Failing container _1 on H1 (should blacklist the node)");
    // Send events to blacklist nodes h1 and h2
    ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
    allocator.sendFailure(f1);
    //At this stage, a request should be created for a fast fail map
    //Create a FAST_FAIL request for a previously failed map.
    ContainerRequestEvent event1f = createReq(jobId, 1, 1024, new String[] { "h1" }, true, false);
    allocator.sendRequest(event1f);
    //Update the Scheduler with the new requests.
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(1, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    // send another request with different resource and priority
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1", "h3" });
    allocator.sendRequest(event3);
    //Allocator is aware of prio:5 container, and prio:20 (h1+h3) container.
    //RM is only aware of the prio:5 container
    LOG.info("h1 Heartbeat (To actually schedule the containers)");
    // update resources in scheduler
    // Node heartbeat
    nodeManager1.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //RMContainerAllocator gets assigned a p:5 on a blacklisted node.
    //Send a release for the p:5 container + another request.
    LOG.info("RM Heartbeat (To process the re-scheduled containers)");
    assigned = allocator.schedule();
    dispatcher.await();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
    //Hearbeat from H3 to schedule on this host.
    LOG.info("h3 Heartbeat (To re-schedule the containers)");
    // Node heartbeat
    nodeManager3.nodeHeartbeat(true);
    dispatcher.await();
    LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)");
    assigned = allocator.schedule();
    assertBlacklistAdditionsAndRemovals(0, 0, rm);
    dispatcher.await();
    // For debugging
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        LOG.info(assig.getTaskAttemptID() + " assgined to " + assig.getContainer().getId() + " with priority " + assig.getContainer().getPriority());
    }
    Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
    // validate that all containers are assigned to h3
    for (TaskAttemptContainerAssignedEvent assig : assigned) {
        Assert.assertEquals("Assigned container " + assig.getContainer().getId() + " host not correct", "h3", assig.getContainer().getNodeId().getHost());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 2 with RMContainerAllocator

use of org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator in project hadoop by apache.

the class TestRMContainerAllocator method testUnsupportedReduceContainerRequirement.

@Test
public void testUnsupportedReduceContainerRequirement() throws Exception {
    final Resource maxContainerSupported = Resource.newInstance(1, 1);
    final ApplicationId appId = ApplicationId.newInstance(1, 1);
    final ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    final JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    final MockScheduler mockScheduler = new MockScheduler(appAttemptId);
    final Configuration conf = new Configuration();
    final MyContainerAllocator allocator = new MyContainerAllocator(null, conf, appAttemptId, mock(Job.class), SystemClock.getInstance()) {

        @Override
        protected void register() {
        }

        @Override
        protected ApplicationMasterProtocol createSchedulerProxy() {
            return mockScheduler;
        }

        @Override
        protected Resource getMaxContainerCapability() {
            return maxContainerSupported;
        }
    };
    ContainerRequestEvent reduceRequestEvt = createReq(jobId, 0, (int) (maxContainerSupported.getMemorySize() + 10), maxContainerSupported.getVirtualCores(), new String[0], false, true);
    allocator.sendRequests(Arrays.asList(reduceRequestEvt));
    // Reducer container requests are added to the pending queue upon request,
    // schedule all reducers here so that we can observe if reducer requests
    // are accepted by RMContainerAllocator on RM side.
    allocator.scheduleAllReduces();
    allocator.schedule();
    Assert.assertEquals(0, mockScheduler.lastAnyAskReduce);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 3 with RMContainerAllocator

use of org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator in project hadoop by apache.

the class JobImpl method actOnUnusableNode.

private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) {
    // running reducers
    if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) {
        List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId);
        if (taskAttemptIdList != null) {
            String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId;
            for (TaskAttemptId id : taskAttemptIdList) {
                if (TaskType.MAP == id.getTaskId().getTaskType()) {
                    // reschedule only map tasks because their outputs maybe unusable
                    LOG.info(mesg + ". AttemptId:" + id);
                    // Kill the attempt and indicate that next map attempt should be
                    // rescheduled (i.e. considered as a fast fail map).
                    eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true));
                }
            }
        }
    }
// currently running task attempts on unusable nodes are handled in
// RMContainerAllocator
}
Also used : TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptKillEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent)

Example 4 with RMContainerAllocator

use of org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator in project hadoop by apache.

the class MRAppBenchmark method benchmark1.

@Test
public void benchmark1() throws Exception {
    // Adjust for benchmarking. Start with thousands.
    int maps = 100;
    int reduces = 0;
    System.out.println("Running benchmark with maps:" + maps + " reduces:" + reduces);
    run(new MRApp(maps, reduces, true, this.getClass().getName(), true) {

        @Override
        protected ContainerAllocator createContainerAllocator(ClientService clientService, AppContext context) {
            AMPreemptionPolicy policy = new NoopAMPreemptionPolicy();
            return new RMContainerAllocator(clientService, context, policy) {

                @Override
                protected ApplicationMasterProtocol createSchedulerProxy() {
                    return new ApplicationMasterProtocol() {

                        @Override
                        public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws IOException {
                            RegisterApplicationMasterResponse response = Records.newRecord(RegisterApplicationMasterResponse.class);
                            response.setMaximumResourceCapability(Resource.newInstance(10240, 1));
                            return response;
                        }

                        @Override
                        public FinishApplicationMasterResponse finishApplicationMaster(FinishApplicationMasterRequest request) throws IOException {
                            FinishApplicationMasterResponse response = Records.newRecord(FinishApplicationMasterResponse.class);
                            return response;
                        }

                        @Override
                        public AllocateResponse allocate(AllocateRequest request) throws IOException {
                            AllocateResponse response = Records.newRecord(AllocateResponse.class);
                            List<ResourceRequest> askList = request.getAskList();
                            List<Container> containers = new ArrayList<Container>();
                            for (ResourceRequest req : askList) {
                                if (!ResourceRequest.isAnyLocation(req.getResourceName())) {
                                    continue;
                                }
                                int numContainers = req.getNumContainers();
                                for (int i = 0; i < numContainers; i++) {
                                    ContainerId containerId = ContainerId.newContainerId(getContext().getApplicationAttemptId(), request.getResponseId() + i);
                                    containers.add(Container.newInstance(containerId, NodeId.newInstance("host" + containerId.getContainerId(), 2345), "host" + containerId.getContainerId() + ":5678", req.getCapability(), req.getPriority(), null));
                                }
                            }
                            response.setAllocatedContainers(containers);
                            response.setResponseId(request.getResponseId() + 1);
                            response.setNumClusterNodes(350);
                            return response;
                        }
                    };
                }
            };
        }
    });
}
Also used : ClientService(org.apache.hadoop.mapreduce.v2.app.client.ClientService) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) NoopAMPreemptionPolicy(org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) FinishApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse) IOException(java.io.IOException) RMContainerAllocator(org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator) ContainerAllocator(org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator) FinishApplicationMasterRequest(org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) ArrayList(java.util.ArrayList) List(java.util.List) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) NoopAMPreemptionPolicy(org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy) AMPreemptionPolicy(org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy) RegisterApplicationMasterRequest(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest) RMContainerAllocator(org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator) Test(org.junit.Test)

Example 5 with RMContainerAllocator

use of org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator in project hadoop by apache.

the class TestRMContainerAllocator method testHandlingFinishedContainers.

/**
   * MAPREDUCE-6771. Test if RMContainerAllocator generates the events in the
   * right order while processing finished containers.
   */
@Test
public void testHandlingFinishedContainers() {
    EventHandler eventHandler = mock(EventHandler.class);
    AppContext context = mock(RunningAppContext.class);
    when(context.getClock()).thenReturn(new ControlledClock());
    when(context.getClusterInfo()).thenReturn(new ClusterInfo(Resource.newInstance(10240, 1)));
    when(context.getEventHandler()).thenReturn(eventHandler);
    RMContainerAllocator containerAllocator = new RMContainerAllocatorForFinishedContainer(null, context, mock(AMPreemptionPolicy.class));
    ContainerStatus finishedContainer = ContainerStatus.newInstance(mock(ContainerId.class), ContainerState.COMPLETE, "", 0);
    containerAllocator.processFinishedContainer(finishedContainer);
    InOrder inOrder = inOrder(eventHandler);
    inOrder.verify(eventHandler).handle(isA(TaskAttemptDiagnosticsUpdateEvent.class));
    inOrder.verify(eventHandler).handle(isA(TaskAttemptEvent.class));
    inOrder.verifyNoMoreInteractions();
}
Also used : ClusterInfo(org.apache.hadoop.mapreduce.v2.app.ClusterInfo) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) InOrder(org.mockito.InOrder) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttemptDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent) RunningAppContext(org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) EventHandler(org.apache.hadoop.yarn.event.EventHandler) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) NoopAMPreemptionPolicy(org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy) AMPreemptionPolicy(org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy) ControlledClock(org.apache.hadoop.yarn.util.ControlledClock) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)6 NoopAMPreemptionPolicy (org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy)4 Configuration (org.apache.hadoop.conf.Configuration)3 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)3 RunningAppContext (org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext)3 ClientService (org.apache.hadoop.mapreduce.v2.app.client.ClientService)3 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)3 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)2 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)2 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)2 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)2 AMPreemptionPolicy (org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)2 ControlledClock (org.apache.hadoop.yarn.util.ControlledClock)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1