Search in sources :

Example 16 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestRMContainerAllocator method testCompletedTasksRecalculateSchedule.

@Test
public void testCompletedTasksRecalculateSchedule() throws Exception {
    LOG.info("Running testCompletedTasksRecalculateSchedule");
    Configuration conf = new Configuration();
    final MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    // Make a node to register so as to launch the AM.
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job job = mock(Job.class);
    when(job.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    doReturn(10).when(job).getTotalMaps();
    doReturn(10).when(job).getTotalReduces();
    doReturn(0).when(job).getCompletedMaps();
    RecalculateContainerAllocator allocator = new RecalculateContainerAllocator(rm, conf, appAttemptId, job);
    allocator.schedule();
    allocator.recalculatedReduceSchedule = false;
    allocator.schedule();
    Assert.assertFalse("Unexpected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
    doReturn(1).when(job).getCompletedMaps();
    allocator.schedule();
    Assert.assertTrue("Expected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 17 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientOnAMRMTokenRollOverOnRMRestart.

// Test verify for AM issued with rolled-over AMRMToken
// is still able to communicate with restarted RM.
@Test(timeout = 30000)
public void testAMRMClientOnAMRMTokenRollOverOnRMRestart() throws Exception {
    conf.setLong(YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, rolling_interval_sec);
    conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start first RM
    MyResourceManager2 rm1 = new MyResourceManager2(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    Long startTime = System.currentTimeMillis();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.getRMContext().getAMRMTokenSecretManager();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // At mean time, the old AMRMToken should continue to work
    while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
        amClient.allocate(0.1f);
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        // DO NOTHING
        }
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getMasterKey().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId());
    amClient.allocate(0.1f);
    // active the nextMasterKey, and replace the currentMasterKey
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    int waitCount = 0;
    while (waitCount++ <= 50) {
        if (amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId()) {
            break;
        }
        try {
            amClient.allocate(0.1f);
        } catch (Exception ex) {
            break;
        }
        Thread.sleep(500);
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getNextMasterKeyData() == null);
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    // start 2nd RM
    conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(45020, 10));
    final MyResourceManager2 rm2 = new MyResourceManager2(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.getRMContext().getAMRMTokenSecretManager();
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getNextMasterKeyData() == null);
    try {
        UserGroupInformation testUser = UserGroupInformation.createRemoteUser("testUser");
        SecurityUtil.setTokenService(token, rm2.getApplicationMasterService().getBindAddress());
        testUser.addToken(token);
        testUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

            @Override
            public ApplicationMasterProtocol run() {
                return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, rm2.getApplicationMasterService().getBindAddress(), conf);
            }
        }).allocate(Records.newRecord(AllocateRequest.class));
        Assert.fail("The old Token should not work");
    } catch (Exception ex) {
        Assert.assertTrue(ex instanceof InvalidToken);
        Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + token.decodeIdentifier().getApplicationAttemptId()));
    }
    // make sure the recovered AMRMToken works for new RM
    amClient.allocate(0.1f);
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AMRMTokenSecretManager(org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager) IOException(java.io.IOException) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) PrivilegedAction(java.security.PrivilegedAction) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 18 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientForUnregisterAMOnRMRestart.

// Test verify for
// 1. AM try to unregister without registering
// 2. AM register to RM, and try to unregister immediately after RM restart
@Test(timeout = 60000)
public void testAMRMClientForUnregisterAMOnRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // Phase-1 Start 1st RM
    MyResourceManager rm1 = new MyResourceManager(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = rm1.getRMContext().getRMApps().get(appAttemptId.getApplicationId()).getRMAppAttempt(appAttemptId).getAMRMToken();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // Phase-2 start 2nd RM is up
    MyResourceManager rm2 = new MyResourceManager(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
    NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, ContainerState.RUNNING, Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0);
    nm1.registerNode(Arrays.asList(containerReport), null);
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHING);
    nm1.nodeHeartbeat(appAttemptId, 1, ContainerState.COMPLETE);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHED);
    rm2.waitForState(app.getApplicationId(), RMAppState.FINISHED);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 19 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class TestYarnClient method testApplicationType.

@Test(timeout = 30000)
public void testApplicationType() throws Exception {
    Logger rootLogger = LogManager.getRootLogger();
    rootLogger.setLevel(Level.DEBUG);
    MockRM rm = new MockRM();
    rm.start();
    RMApp app = rm.submitApp(2000);
    RMApp app1 = rm.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE");
    Assert.assertEquals("YARN", app.getApplicationType());
    Assert.assertEquals("MAPREDUCE", app1.getApplicationType());
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Logger(org.apache.log4j.Logger) Test(org.junit.Test)

Example 20 with RMApp

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.

the class ApplicationMasterService method registerApplicationMaster.

@Override
public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws YarnException, IOException {
    AMRMTokenIdentifier amrmTokenIdentifier = YarnServerSecurityUtils.authorizeRequest();
    ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.getApplicationAttemptId();
    ApplicationId appID = applicationAttemptId.getApplicationId();
    AllocateResponseLock lock = responseMap.get(applicationAttemptId);
    if (lock == null) {
        RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService", "Error in registering application master", appID, applicationAttemptId);
        throwApplicationDoesNotExistInCacheException(applicationAttemptId);
    }
    // Allow only one thread in AM to do registerApp at a time.
    synchronized (lock) {
        AllocateResponse lastResponse = lock.getAllocateResponse();
        if (hasApplicationMasterRegistered(applicationAttemptId)) {
            String message = "Application Master is already registered : " + appID;
            LOG.warn(message);
            RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "", "ApplicationMasterService", message, appID, applicationAttemptId);
            throw new InvalidApplicationMasterRequestException(message);
        }
        this.amLivelinessMonitor.receivedPing(applicationAttemptId);
        RMApp app = this.rmContext.getRMApps().get(appID);
        // Setting the response id to 0 to identify if the
        // application master is register for the respective attemptid
        lastResponse.setResponseId(0);
        lock.setAllocateResponse(lastResponse);
        LOG.info("AM registration " + applicationAttemptId);
        this.rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptRegistrationEvent(applicationAttemptId, request.getHost(), request.getRpcPort(), request.getTrackingUrl()));
        RMAuditLogger.logSuccess(app.getUser(), AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, applicationAttemptId);
        // Pick up min/max resource from scheduler...
        RegisterApplicationMasterResponse response = recordFactory.newRecordInstance(RegisterApplicationMasterResponse.class);
        response.setMaximumResourceCapability(rScheduler.getMaximumResourceCapability(app.getQueue()));
        response.setApplicationACLs(app.getRMAppAttempt(applicationAttemptId).getSubmissionContext().getAMContainerSpec().getApplicationACLs());
        response.setQueue(app.getQueue());
        if (UserGroupInformation.isSecurityEnabled()) {
            LOG.info("Setting client token master key");
            response.setClientToAMTokenMasterKey(java.nio.ByteBuffer.wrap(rmContext.getClientToAMTokenSecretManager().getMasterKey(applicationAttemptId).getEncoded()));
        }
        // and corresponding NM tokens.
        if (app.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
            List<Container> transferredContainers = rScheduler.getTransferredContainers(applicationAttemptId);
            if (!transferredContainers.isEmpty()) {
                response.setContainersFromPreviousAttempts(transferredContainers);
                List<NMToken> nmTokens = new ArrayList<NMToken>();
                for (Container container : transferredContainers) {
                    try {
                        NMToken token = rmContext.getNMTokenSecretManager().createAndGetNMToken(app.getUser(), applicationAttemptId, container);
                        if (null != token) {
                            nmTokens.add(token);
                        }
                    } catch (IllegalArgumentException e) {
                        // will be automatically retried by RMProxy in RPC layer.
                        if (e.getCause() instanceof UnknownHostException) {
                            throw (UnknownHostException) e.getCause();
                        }
                    }
                }
                response.setNMTokensFromPreviousAttempts(nmTokens);
                LOG.info("Application " + appID + " retrieved " + transferredContainers.size() + " containers from previous" + " attempts and " + nmTokens.size() + " NM tokens.");
            }
        }
        response.setSchedulerResourceTypes(rScheduler.getSchedulingResourceTypes());
        return response;
    }
}
Also used : InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NMToken(org.apache.hadoop.yarn.api.records.NMToken) UnknownHostException(java.net.UnknownHostException) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) UpdatedContainer(org.apache.hadoop.yarn.api.records.UpdatedContainer) PreemptionContainer(org.apache.hadoop.yarn.api.records.PreemptionContainer) Container(org.apache.hadoop.yarn.api.records.Container) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMAppAttemptRegistrationEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent)

Aggregations

RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)447 Test (org.junit.Test)350 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)132 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)124 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)116 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)105 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)99 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)97 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)91 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)68 Configuration (org.apache.hadoop.conf.Configuration)66 Container (org.apache.hadoop.yarn.api.records.Container)58 ArrayList (java.util.ArrayList)56 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)53 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)44 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)42 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)41 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)40