Search in sources :

Example 1 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestFairScheduler method testFairShareAndWeightsInNestedUserQueueRule.

@Test
public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception {
    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println("<?xml version=\"1.0\"?>");
    out.println("<allocations>");
    out.println("<queue name=\"parentq\" type=\"parent\">");
    out.println("<minResources>1024mb,0vcores</minResources>");
    out.println("</queue>");
    out.println("<queuePlacementPolicy>");
    out.println("<rule name=\"nestedUserQueue\">");
    out.println("     <rule name=\"specified\" create=\"false\" />");
    out.println("</rule>");
    out.println("<rule name=\"default\" />");
    out.println("</queuePlacementPolicy>");
    out.println("</allocations>");
    out.close();
    RMApp rmApp1 = new MockRMApp(0, 0, RMAppState.NEW);
    RMApp rmApp2 = new MockRMApp(1, 1, RMAppState.NEW);
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    int capacity = 16 * 1024;
    // create node with 16 G
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(capacity), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    // user1,user2 submit their apps to parentq and create user queues
    createSchedulingRequest(10 * 1024, "root.parentq", "user1");
    createSchedulingRequest(10 * 1024, "root.parentq", "user2");
    // user3 submits app in default queue
    createSchedulingRequest(10 * 1024, "root.default", "user3");
    scheduler.update();
    scheduler.getQueueManager().getRootQueue().setSteadyFairShare(scheduler.getClusterResource());
    scheduler.getQueueManager().getRootQueue().recomputeSteadyShares();
    Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager().getLeafQueues();
    for (FSLeafQueue leaf : leafQueues) {
        if (leaf.getName().equals("root.parentq.user1") || leaf.getName().equals("root.parentq.user2")) {
            // assert that the fair share is 1/4th node1's capacity
            assertEquals(capacity / 4, leaf.getFairShare().getMemorySize());
            // assert that the steady fair share is 1/4th node1's capacity
            assertEquals(capacity / 4, leaf.getSteadyFairShare().getMemorySize());
            // assert weights are equal for both the user queues
            assertEquals(1.0, leaf.getWeights().getWeight(ResourceType.MEMORY), 0);
        }
    }
}
Also used : MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) FileWriter(java.io.FileWriter) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 2 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRMWebServicesSchedulerActivities method testAssignWithoutAvailableResource.

@Test
public void testAssignWithoutAvailableResource() throws Exception {
    //Start RM so that it accepts app submissions
    rm.start();
    MockNM nm = new MockNM("127.0.0.1:1234", 1 * 1024, rm.getResourceTrackerService());
    nm.registerNode();
    try {
        RMApp app1 = rm.submitApp(1024, "app1", "user1", null, "b1");
        MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm);
        am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.UNDEFINED, "127.0.0.1", Resources.createResource(1024), 10), ResourceRequest.newInstance(Priority.UNDEFINED, "/default-rack", Resources.createResource(1024), 10), ResourceRequest.newInstance(Priority.UNDEFINED, "*", Resources.createResource(1024), 10)), null);
        //Get JSON
        WebResource r = resource();
        MultivaluedMapImpl params = new MultivaluedMapImpl();
        params.add("nodeId", "127.0.0.1");
        ClientResponse response = r.path("ws").path("v1").path("cluster").path("scheduler/activities").queryParams(params).accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
        assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString());
        JSONObject json = response.getEntity(JSONObject.class);
        nm.nodeHeartbeat(true);
        Thread.sleep(1000);
        //Get JSON
        response = r.path("ws").path("v1").path("cluster").path("scheduler/activities").queryParams(params).accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
        assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString());
        json = response.getEntity(JSONObject.class);
        verifyNumberOfAllocations(json, 0);
    } finally {
        rm.stop();
    }
}
Also used : ClientResponse(com.sun.jersey.api.client.ClientResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) JSONObject(org.codehaus.jettison.json.JSONObject) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) WebResource(com.sun.jersey.api.client.WebResource) MultivaluedMapImpl(com.sun.jersey.core.util.MultivaluedMapImpl) Test(org.junit.Test)

Example 3 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestWorkPreservingRMRestart method testDynamicQueueRecovery.

// Test work preserving recovery of apps running under reservation.
// This involves:
// 1. Setting up a dynamic reservable queue,
// 2. Submitting an app to it,
// 3. Failing over RM,
// 4. Validating that the app is recovered post failover,
// 5. Check if all running containers are recovered,
// 6. Verify the scheduler state like attempt info,
// 7. Verify the queue/user metrics for the dynamic reservable queue.
@Test(timeout = 30000)
public void testDynamicQueueRecovery() throws Exception {
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    // 1. Set up dynamic reservable queue.
    Configuration schedulerConf = getSchedulerDynamicConfiguration();
    int containerMemory = 1024;
    Resource containerResource = Resource.newInstance(containerMemory, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(schedulerConf);
    rm1 = new MockRM(schedulerConf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    // 2. Run plan follower to update the added node & then submit app to
    // dynamic queue.
    rm1.getRMContext().getReservationSystem().synchronizePlan(ReservationSystemTestUtil.reservationQ, true);
    RMApp app1 = rm1.submitApp(200, "dynamicQApp", UserGroupInformation.getCurrentUser().getShortUserName(), null, ReservationSystemTestUtil.getReservationQueueName());
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // clear queue metrics
    rm1.clearQueueMetrics(app1);
    // 3. Fail over (restart) RM.
    rm2 = new MockRM(schedulerConf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // 4. Validate app is recovered post failover.
    RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
    // Wait for RM to settle down on recovering containers.
    waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
    assertTrue(launchedContainers.contains(amContainer.getContainerId()));
    assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
    // 5. Check RMContainers are re-recreated and the container state is
    // correct.
    rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
    AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
    SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
    // ********* check scheduler node state.*******
    // 2 running containers.
    Resource usedResources = Resources.multiply(containerResource, 2);
    Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
    assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
    assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
    assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
    // 2 launched containers, 1 completed container
    assertEquals(2, schedulerNode1.getNumContainers());
    assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
    assertEquals(usedResources, schedulerNode1.getAllocatedResource());
    Resource availableResources = Resources.subtract(nmResource, usedResources);
    // 6. Verify the scheduler state like attempt info.
    Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>> sa = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication<SchedulerApplicationAttempt> schedulerApp = sa.get(recoveredApp1.getApplicationId());
    // 7. Verify the queue/user metrics for the dynamic reservable queue.
    if (getSchedulerType() == SchedulerType.CAPACITY) {
        checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
    } else {
        checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
    }
    // *********** check scheduler attempt state.********
    SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
    assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
    // *********** check appSchedulingInfo state ***********
    assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) FairSchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) Resource(org.apache.hadoop.yarn.api.records.Resource) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 4 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestWorkPreservingRMRestart method testCapacityLeafQueueBecomesParentOnRecovery.

//Test behavior of an app if queue is changed from leaf to parent during
//recovery. Test case does following:
//1. Add an app to QueueB and start the attempt.
//2. Add 2 subqueues(QueueB1 and QueueB2) to QueueB, restart the RM, once with
//   fail fast config as false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacityLeafQueueBecomesParentOnRecovery() throws Exception {
    if (getSchedulerType() != SchedulerType.CAPACITY) {
        return;
    }
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfiguration(csConf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(csConf);
    rm1 = new MockRM(csConf, memStore);
    rm1.start();
    MockNM nm = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
    nm.registerNode();
    // Submit an app to QueueB.
    RMApp app = rm1.submitApp(1024, "app", USER_2, null, B);
    MockRM.launchAndRegisterAM(app, rm1, nm);
    assertEquals(rm1.getApplicationReport(app.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
    // Take a copy of state store so that it can be reset to this state.
    RMState state = memStore.loadState();
    // Change scheduler config with child queues added to QueueB.
    csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfigurationChildOfB(csConf);
    String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which is no longer a leaf queue after restart.";
    verifyAppRecoveryWithWrongQueueConfig(csConf, app, diags, memStore, state);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 5 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestWorkPreservingRMRestart method testAppFailedToRenewTokenOnRecovery.

@Test(timeout = 30000)
public void testAppFailedToRenewTokenOnRecovery() throws Exception {
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    UserGroupInformation.setConfiguration(conf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new TestSecurityMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app1 = rm1.submitApp(200);
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    MockRM rm2 = new TestSecurityMockRM(conf, memStore) {

        protected DelegationTokenRenewer createDelegationTokenRenewer() {
            return new DelegationTokenRenewer() {

                @Override
                public void addApplicationSync(ApplicationId applicationId, Credentials ts, boolean shouldCancelAtEnd, String user) throws IOException {
                    throw new IOException("Token renew failed !!");
                }
            };
        }
    };
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    rm2.start();
    NMContainerStatus containerStatus = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    nm1.registerNode(Arrays.asList(containerStatus), null);
    // am re-register
    rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    am1.registerAppAttempt(true);
    rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
    // Because the token expired, am could crash.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm2.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) DelegationTokenRenewer(org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) IOException(java.io.IOException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Credentials(org.apache.hadoop.security.Credentials) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) Test(org.junit.Test)

Aggregations

RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)448 Test (org.junit.Test)351 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)132 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)124 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)116 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)105 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)99 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)97 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)91 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)68 Configuration (org.apache.hadoop.conf.Configuration)66 Container (org.apache.hadoop.yarn.api.records.Container)58 ArrayList (java.util.ArrayList)56 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)53 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)44 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)42 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)41 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)40