Search in sources :

Example 91 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRMRestart method testSynchronouslyRenewDTOnRecovery.

// Test Delegation token is renewed synchronously so that recover events
// can be processed before any other external incoming events, specifically
// the ContainerFinished event on NM re-registraton.
@Test(timeout = 20000)
public void testSynchronouslyRenewDTOnRecovery() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app0 = rm1.submitApp(200);
    final MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    MockRM rm2 = new MockRM(conf, memStore) {

        @Override
        protected ResourceTrackerService createResourceTrackerService() {
            return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {

                @Override
                protected void serviceStart() throws Exception {
                    // send the container_finished event as soon as the
                    // ResourceTrackerService is started.
                    super.serviceStart();
                    nm1.setResourceTrackerService(getResourceTrackerService());
                    NMContainerStatus status = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
                    nm1.registerNode(Arrays.asList(status), null);
                }
            };
        }
    };
    try {
        // Re-start RM
        rm2.start();
        // wait for the 2nd attempt to be started.
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        int timeoutSecs = 0;
        while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
            Thread.sleep(200);
        }
        MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
        MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
    } finally {
        rm2.stop();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) Test(org.junit.Test)

Example 92 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRMRestart method testRMStateStoreDispatcherDrainedOnRMStop.

@Test(timeout = 60000)
public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        volatile boolean wait = true;

        @Override
        public void serviceStop() throws Exception {
            // Unblock app saving request.
            wait = false;
            super.serviceStop();
        }

        @Override
        protected void handleStoreEvent(RMStateStoreEvent event) {
            // Skip if synchronous updation of DTToken
            if (!(event instanceof RMStateStoreAMRMTokenEvent) && !(event instanceof RMStateStoreRMDTEvent) && !(event instanceof RMStateStoreRMDTMasterKeyEvent)) {
                while (wait) ;
            }
            super.handleStoreEvent(event);
        }
    };
    memStore.init(conf);
    // start RM
    final MockRM rm1 = createMockRM(conf, memStore);
    rm1.disableDrainEventsImplicitly();
    rm1.start();
    // create apps.
    final ArrayList<RMApp> appList = new ArrayList<RMApp>();
    final int NUM_APPS = 5;
    for (int i = 0; i < NUM_APPS; i++) {
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false);
        appList.add(app);
        rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
    }
    // all apps's saving request are now enqueued to RMStateStore's dispatcher
    // queue, and will be processed once rm.stop() is called.
    // Nothing exist in state store before stop is called.
    Map<ApplicationId, ApplicationStateData> rmAppState = memStore.getState().getApplicationState();
    Assert.assertTrue(rmAppState.size() == 0);
    // stop rm
    rm1.stop();
    // request on dispatcher.
    for (RMApp app : appList) {
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
    }
    Assert.assertTrue(rmAppState.size() == NUM_APPS);
}
Also used : RMStateStoreAMRMTokenEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreAMRMTokenEvent) RMStateStoreRMDTMasterKeyEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTMasterKeyEvent) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ArrayList(java.util.ArrayList) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMStateStoreEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) RMStateStoreRMDTEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 93 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRMRestart method testRMRestartKilledApp.

@Test(timeout = 60000)
public void testRMRestartKilledApp() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // kill the app.
    rm1.killApp(app0.getApplicationId());
    rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // killed state is saved.
    ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
    Assert.assertEquals(RMAppState.KILLED, appState.getState());
    Assert.assertEquals(RMAppAttemptState.KILLED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
    String trackingUrl = app0.getCurrentAppAttempt().getOriginalTrackingUrl();
    Assert.assertNotNull(trackingUrl);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // no new attempt is created.
    Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
    ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
    Assert.assertEquals(app0.getDiagnostics().toString(), appReport.getDiagnostics());
    Assert.assertEquals(trackingUrl, loadedApp0.getCurrentAppAttempt().getOriginalTrackingUrl());
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 94 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testNodeRemovalDuringAllocate.

@Test(timeout = 60000)
public void testNodeRemovalDuringAllocate() throws Exception {
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // Both node 1 and node 2 will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        if (ctxt.getNodeMap().size() == 2) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(2, ctxt.getNodeMap().size());
    // Remove node from scheduler but not from AM Service.
    scheduler.handle(new NodeRemovedSchedulerEvent(rmNode1));
    // After removal of node 1, only 1 node will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        try {
            am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        } catch (Exception e) {
            Assert.fail("Allocate request should be handled on node removal");
        }
        if (ctxt.getNodeMap().size() == 1) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(1, ctxt.getNodeMap().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Example 95 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRM method testNMTokenSentForNormalContainer.

// Test even if AM container is allocated with containerId not equal to 1, the
// following allocate requests from AM should be able to retrieve the
// corresponding NM Token.
@Test(timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
    conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName());
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("h1:1234", 5120);
    RMApp app = rm.submitApp(2000);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    // Call getNewContainerId to increase container Id so that the AM container
    // Id doesn't equal to one.
    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
    cs.getApplicationAttempt(attempt.getAppAttemptId()).getNewContainerId();
    MockAM am = MockRM.launchAM(app, rm, nm1);
    // am container Id not equal to 1.
    Assert.assertTrue(attempt.getMasterContainer().getId().getContainerId() != 1);
    // NMSecretManager doesn't record the node on which the am is allocated.
    Assert.assertFalse(rm.getRMContext().getNMTokenSecretManager().isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
    am.registerAppAttempt();
    rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
    int NUM_CONTAINERS = 1;
    List<Container> containers = new ArrayList<Container>();
    // nmTokens keeps track of all the nmTokens issued in the allocate call.
    List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
    // am1 allocate 1 container on nm1.
    while (true) {
        AllocateResponse response = am.allocate("127.0.0.1", 2000, NUM_CONTAINERS, new ArrayList<ContainerId>());
        nm1.nodeHeartbeat(true);
        containers.addAll(response.getAllocatedContainers());
        expectedNMTokens.addAll(response.getNMTokens());
        if (containers.size() == NUM_CONTAINERS) {
            break;
        }
        Thread.sleep(200);
        System.out.println("Waiting for container to be allocated.");
    }
    NodeId nodeId = expectedNMTokens.get(0).getNodeId();
    // NMToken is sent for the allocated container.
    Assert.assertEquals(nm1.getNodeId(), nodeId);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NMToken(org.apache.hadoop.yarn.api.records.NMToken) ArrayList(java.util.ArrayList) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)447 Test (org.junit.Test)350 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)132 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)124 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)116 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)105 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)99 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)97 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)91 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)68 Configuration (org.apache.hadoop.conf.Configuration)66 Container (org.apache.hadoop.yarn.api.records.Container)58 ArrayList (java.util.ArrayList)56 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)53 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)44 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)42 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)41 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)40