Search in sources :

Example 71 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestKillApplicationWithRMHA method testKillAppWhenFailoverHappensAtNewState.

@Test(timeout = 20000)
public void testKillAppWhenFailoverHappensAtNewState() throws Exception {
    // create a customized RMAppManager
    // During the process of Application submission,
    // the RMAppState will always be NEW.
    // The ApplicationState will not be saved in RMStateStore.
    startRMsWithCustomizedRMAppManager();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Submit the application
    RMApp app0 = rm1.submitApp(200, "", UserGroupInformation.getCurrentUser().getShortUserName(), null, false, null, configuration.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, false, false);
    // when receives the KillApplicationRequest
    try {
        failOverAndKillApp(app0.getApplicationId(), RMAppState.NEW);
        fail("Should get an exception here");
    } catch (ApplicationNotFoundException ex) {
        Assert.assertTrue(ex.getMessage().contains("Trying to kill an absent application " + app0.getApplicationId()));
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) Test(org.junit.Test)

Example 72 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestKillApplicationWithRMHA method killApplication.

private void killApplication(MockRM rm, ApplicationId appId, ApplicationAttemptId appAttemptId, RMAppState rmAppState) throws Exception {
    KillApplicationResponse response = rm.killApp(appId);
    Assert.assertTrue(response.getIsKillCompleted() == isFinalState(rmAppState));
    RMApp loadedApp0 = rm.getRMContext().getRMApps().get(appId);
    rm.waitForState(appId, RMAppState.KILLED);
    if (appAttemptId != null) {
        rm.waitForState(appAttemptId, RMAppAttemptState.KILLED);
    }
    // no new attempt is created.
    Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) KillApplicationResponse(org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse)

Example 73 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestKillApplicationWithRMHA method testKillAppWhenFailOverHappensDuringApplicationKill.

@Test(timeout = 20000)
public void testKillAppWhenFailOverHappensDuringApplicationKill() throws Exception {
    // create a customized ClientRMService
    // When receives the killApplicationRequest, simply return the response
    // and make sure the application will not be KILLED State
    startRMsWithCustomizedClientRMService();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // ensure that the app is in running state
    Assert.assertEquals(app0.getState(), RMAppState.RUNNING);
    // kill the app.
    rm1.killApp(app0.getApplicationId());
    // failover happens before this application goes to final state.
    // The RMAppState that will be loaded by the active rm
    // should be ACCEPTED.
    failOverAndKillApp(app0.getApplicationId(), am0.getApplicationAttemptId(), RMAppState.RUNNING, RMAppAttemptState.RUNNING, RMAppState.ACCEPTED);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Test(org.junit.Test)

Example 74 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestAMAuthorization method testUnauthorizedAccess.

@Test
public void testUnauthorizedAccess() throws Exception {
    MyContainerManager containerManager = new MyContainerManager();
    rm = new MockRMWithAMS(conf, containerManager);
    rm.start();
    MockNM nm1 = rm.registerNode("localhost:1234", 5120);
    RMApp app = rm.submitApp(1024);
    nm1.nodeHeartbeat(true);
    int waitCount = 0;
    while (containerManager.containerTokens == null && waitCount++ < 40) {
        LOG.info("Waiting for AM Launch to happen..");
        Thread.sleep(1000);
    }
    Assert.assertNotNull(containerManager.containerTokens);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    ApplicationAttemptId applicationAttemptId = attempt.getAppAttemptId();
    waitForLaunchedState(attempt);
    final Configuration conf = rm.getConfig();
    final YarnRPC rpc = YarnRPC.create(conf);
    final InetSocketAddress serviceAddr = conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);
    UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(applicationAttemptId.toString());
    // First try contacting NM without tokens
    ApplicationMasterProtocol client = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

        @Override
        public ApplicationMasterProtocol run() {
            return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, serviceAddr, conf);
        }
    });
    RegisterApplicationMasterRequest request = Records.newRecord(RegisterApplicationMasterRequest.class);
    try {
        client.registerApplicationMaster(request);
        Assert.fail("Should fail with authorization error");
    } catch (Exception e) {
        if (isCause(AccessControlException.class, e)) {
            // Because there are no tokens, the request should be rejected as the
            // server side will assume we are trying simple auth.
            String expectedMessage = "";
            if (UserGroupInformation.isSecurityEnabled()) {
                expectedMessage = "Client cannot authenticate via:[TOKEN]";
            } else {
                expectedMessage = "SIMPLE authentication is not enabled.  Available:[TOKEN]";
            }
            Assert.assertTrue(e.getCause().getMessage().contains(expectedMessage));
        } else {
            throw e;
        }
    }
// TODO: Add validation of invalid authorization when there's more data in
// the AMRMToken
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InetSocketAddress(java.net.InetSocketAddress) AccessControlException(org.apache.hadoop.security.AccessControlException) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnRPC(org.apache.hadoop.yarn.ipc.YarnRPC) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) RegisterApplicationMasterRequest(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 75 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class AbstractYarnScheduler method recoverContainersOnNode.

public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
    try {
        writeLock.lock();
        if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
            return;
        }
        for (NMContainerStatus container : containerReports) {
            ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
            RMApp rmApp = rmContext.getRMApps().get(appId);
            if (rmApp == null) {
                LOG.error("Skip recovering container " + container + " for unknown application.");
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            SchedulerApplication<T> schedulerApp = applications.get(appId);
            if (schedulerApp == null) {
                LOG.info("Skip recovering container  " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            LOG.info("Recovering container " + container);
            SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
            if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
                // Do not recover containers for stopped attempt or previous attempt.
                if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
                    LOG.info("Skip recovering container " + container + " for already stopped attempt.");
                    killOrphanContainerOnNode(nm, container);
                    continue;
                }
            }
            // create container
            RMContainer rmContainer = recoverAndCreateContainer(container, nm);
            // recover RMContainer
            rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
            // recover scheduler node
            SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
            schedulerNode.recoverContainer(rmContainer);
            // recover queue: update headroom etc.
            Queue queue = schedulerAttempt.getQueue();
            queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
            // recover scheduler attempt
            schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
            // set master container for the current running AMContainer for this
            // attempt.
            RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
            if (appAttempt != null) {
                Container masterContainer = appAttempt.getMasterContainer();
                // container ID stored in AppAttempt.
                if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
                    ((RMContainerImpl) rmContainer).setAMContainer(true);
                }
            }
            if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
                // release the container
                rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
                LOG.info(container.getContainerId() + " is released by application.");
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainerRecoverEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMContainerImpl(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Aggregations

RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)447 Test (org.junit.Test)350 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)132 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)124 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)116 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)105 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)99 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)97 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)91 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)68 Configuration (org.apache.hadoop.conf.Configuration)66 Container (org.apache.hadoop.yarn.api.records.Container)58 ArrayList (java.util.ArrayList)56 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)53 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)44 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)42 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)41 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)40