Search in sources :

Example 41 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testPreferenceOfNeedyAppsTowardsNodePartitions.

@Test
public void testPreferenceOfNeedyAppsTowardsNodePartitions() throws Exception {
    /**
     * Test case: Submit two application to a queue (app1 first then app2), app1
     * asked for no-label, app2 asked for label=x, when node1 has label=x
     * doing heart beat, app2 will get allocation first, even if app2 submits later
     * than app1
     */
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y", false)));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("y")));
    // inject node label manager
    MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = y
    MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
    // label = <empty>
    MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB);
    // launch an app to queue b1 (label = y), AM container should be launched in nm2
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
    // launch another app to queue b1 (label = y), AM container should be launched in nm2
    RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // request container and nm1 do heartbeat (nm2 has label=y), note that app1
    // request non-labeled container, and app2 request labeled container, app2
    // will get allocated first even if app1 submitted first.  
    am1.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>());
    am2.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>(), "y");
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
    // Do node heartbeats many times
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
        cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    }
    // App2 will get preference to be allocated on node1, and node1 will be all
    // used by App2.
    FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
    FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
    // app1 get nothing in nm1 (partition=y)
    checkNumOfContainersInAnAppOnGivenNode(0, nm1.getNodeId(), schedulerApp1);
    checkNumOfContainersInAnAppOnGivenNode(9, nm2.getNodeId(), schedulerApp1);
    // app2 get all resource in nm1 (partition=y)
    checkNumOfContainersInAnAppOnGivenNode(8, nm1.getNodeId(), schedulerApp2);
    checkNumOfContainersInAnAppOnGivenNode(1, nm2.getNodeId(), schedulerApp2);
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 42 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestApplicationLifetimeMonitor method testApplicationLifetimeMonitor.

@Test(timeout = 60000)
public void testApplicationLifetimeMonitor() throws Exception {
    MockRM rm = null;
    try {
        rm = new MockRM(conf);
        rm.start();
        Priority appPriority = Priority.newInstance(0);
        MockNM nm1 = rm.registerNode("127.0.0.1:1234", 16 * 1024);
        Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
        timeouts.put(ApplicationTimeoutType.LIFETIME, 10L);
        RMApp app1 = rm.submitApp(1024, appPriority, timeouts);
        // 20L seconds
        timeouts.put(ApplicationTimeoutType.LIFETIME, 20L);
        RMApp app2 = rm.submitApp(1024, appPriority, timeouts);
        nm1.nodeHeartbeat(true);
        // Send launch Event
        MockAM am1 = rm.sendAMLaunched(app1.getCurrentAppAttempt().getAppAttemptId());
        am1.registerAppAttempt();
        rm.waitForState(app1.getApplicationId(), RMAppState.KILLED);
        Assert.assertTrue("Application killed before lifetime value", (System.currentTimeMillis() - app1.getSubmitTime()) > 10000);
        Map<ApplicationTimeoutType, String> updateTimeout = new HashMap<ApplicationTimeoutType, String>();
        long newLifetime = 10L;
        // update 10L seconds more to timeout
        String formatISO8601 = Times.formatISO8601(System.currentTimeMillis() + newLifetime * 1000);
        updateTimeout.put(ApplicationTimeoutType.LIFETIME, formatISO8601);
        UpdateApplicationTimeoutsRequest request = UpdateApplicationTimeoutsRequest.newInstance(app2.getApplicationId(), updateTimeout);
        Map<ApplicationTimeoutType, Long> applicationTimeouts = app2.getApplicationTimeouts();
        // has old timeout time
        long beforeUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
        // update app2 lifetime to new time i.e now + timeout
        rm.getRMContext().getClientRMService().updateApplicationTimeouts(request);
        applicationTimeouts = app2.getApplicationTimeouts();
        long afterUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
        Assert.assertTrue("Application lifetime value not updated", afterUpdate > beforeUpdate);
        // verify for application report.
        RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
        GetApplicationReportRequest appRequest = recordFactory.newRecordInstance(GetApplicationReportRequest.class);
        appRequest.setApplicationId(app2.getApplicationId());
        Map<ApplicationTimeoutType, ApplicationTimeout> appTimeouts = rm.getRMContext().getClientRMService().getApplicationReport(appRequest).getApplicationReport().getApplicationTimeouts();
        Assert.assertTrue("Application Timeout are empty.", !appTimeouts.isEmpty());
        ApplicationTimeout timeout = appTimeouts.get(ApplicationTimeoutType.LIFETIME);
        Assert.assertEquals("Application timeout string is incorrect.", formatISO8601, timeout.getExpiryTime());
        Assert.assertTrue("Application remaining time is incorrect", timeout.getRemainingTime() > 0);
        rm.waitForState(app2.getApplicationId(), RMAppState.KILLED);
        // verify for app killed with updated lifetime
        Assert.assertTrue("Application killed before lifetime value", app2.getFinishTime() > afterUpdate);
    } finally {
        stopRM(rm);
    }
}
Also used : GetApplicationReportRequest(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest) HashMap(java.util.HashMap) Priority(org.apache.hadoop.yarn.api.records.Priority) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationTimeoutType(org.apache.hadoop.yarn.api.records.ApplicationTimeoutType) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) UpdateApplicationTimeoutsRequest(org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest) RecordFactory(org.apache.hadoop.yarn.factories.RecordFactory) ApplicationTimeout(org.apache.hadoop.yarn.api.records.ApplicationTimeout) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 43 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestApplicationLifetimeMonitor method testApplicationLifetimeOnRMRestart.

@Test(timeout = 180000)
public void testApplicationLifetimeOnRMRestart() throws Exception {
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm1.nodeHeartbeat(true);
    long appLifetime = 60L;
    Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
    timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
    RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // Re-start RM
    MockRM rm2 = new MockRM(conf, memStore);
    // make sure app has been unregistered with old RM else both will trigger
    // Expire event
    rm1.getRMContext().getRMAppLifetimeMonitor().unregisterApp(app1.getApplicationId(), ApplicationTimeoutType.LIFETIME);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // recover app
    RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer), null);
    // Wait for RM to settle down on recovering containers;
    TestWorkPreservingRMRestart.waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
    assertTrue(launchedContainers.contains(amContainer.getContainerId()));
    assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
    // check RMContainers are re-recreated and the container state is correct.
    rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
    // re register attempt to rm2
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.ACCEPTED);
    am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    am1.registerAppAttempt();
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.RUNNING);
    // wait for app life time and application to be in killed state.
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.KILLED);
    Assert.assertTrue("Application killed before lifetime value", recoveredApp1.getFinishTime() > (recoveredApp1.getSubmitTime() + appLifetime * 1000));
}
Also used : HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationTimeoutType(org.apache.hadoop.yarn.api.records.ApplicationTimeoutType) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Example 44 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestNMReconnect method testRMNodeStatusAfterReconnect.

@Test(timeout = 10000)
public void testRMNodeStatusAfterReconnect() throws Exception {
    // The node(127.0.0.1:1234) reconnected with RM. When it registered with
    // RM, RM set its lastNodeHeartbeatResponse's id to 0 asynchronously. But
    // the node's heartbeat come before RM succeeded setting the id to 0.
    final DrainDispatcher dispatcher = new DrainDispatcher();
    MockRM rm = new MockRM() {

        @Override
        protected Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    rm.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
    nm1.registerNode();
    int i = 0;
    while (i < 3) {
        nm1.nodeHeartbeat(true);
        dispatcher.await();
        i++;
    }
    MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
    nm2.registerNode();
    RMNode rmNode = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm2.nodeHeartbeat(true);
    dispatcher.await();
    Assert.assertEquals("Node is Not in Running state.", NodeState.RUNNING, rmNode.getState());
    rm.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 45 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestNMReconnect method testDecommissioningNodeReconnect.

@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testDecommissioningNodeReconnect() throws Exception {
    MockRM rm = new MockRM();
    rm.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
    nm1.registerNode();
    rm.waitForState(nm1.getNodeId(), NodeState.RUNNING);
    rm.getRMContext().getDispatcher().getEventHandler().handle(new RMNodeEvent(nm1.getNodeId(), RMNodeEventType.GRACEFUL_DECOMMISSION));
    rm.waitForState(nm1.getNodeId(), NodeState.DECOMMISSIONING);
    MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
    RegisterNodeManagerResponse response = nm2.registerNode();
    // not SHUTDOWN
    Assert.assertTrue(response.getNodeAction().equals(NodeAction.NORMAL));
    rm.stop();
}
Also used : MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) RegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent) Test(org.junit.Test)

Aggregations

MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)224 Test (org.junit.Test)218 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)128 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)127 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)79 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)69 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)48 ClientResponse (com.sun.jersey.api.client.ClientResponse)47 Configuration (org.apache.hadoop.conf.Configuration)47 WebResource (com.sun.jersey.api.client.WebResource)39 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)38 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)37 JSONObject (org.codehaus.jettison.json.JSONObject)37 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)36 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)33 Container (org.apache.hadoop.yarn.api.records.Container)29 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)28 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)23 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)22