Search in sources :

Example 16 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestRMContainerAllocator method testCompletedTasksRecalculateSchedule.

@Test
public void testCompletedTasksRecalculateSchedule() throws Exception {
    LOG.info("Running testCompletedTasksRecalculateSchedule");
    Configuration conf = new Configuration();
    final MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    // Make a node to register so as to launch the AM.
    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job job = mock(Job.class);
    when(job.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    doReturn(10).when(job).getTotalMaps();
    doReturn(10).when(job).getTotalReduces();
    doReturn(0).when(job).getCompletedMaps();
    RecalculateContainerAllocator allocator = new RecalculateContainerAllocator(rm, conf, appAttemptId, job);
    allocator.schedule();
    allocator.recalculatedReduceSchedule = false;
    allocator.schedule();
    Assert.assertFalse("Unexpected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
    doReturn(1).when(job).getCompletedMaps();
    allocator.schedule();
    Assert.assertTrue("Expected recalculate of reduce schedule", allocator.recalculatedReduceSchedule);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 17 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientOnAMRMTokenRollOverOnRMRestart.

// Test verify for AM issued with rolled-over AMRMToken
// is still able to communicate with restarted RM.
@Test(timeout = 30000)
public void testAMRMClientOnAMRMTokenRollOverOnRMRestart() throws Exception {
    conf.setLong(YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, rolling_interval_sec);
    conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start first RM
    MyResourceManager2 rm1 = new MyResourceManager2(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    Long startTime = System.currentTimeMillis();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.getRMContext().getAMRMTokenSecretManager();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // At mean time, the old AMRMToken should continue to work
    while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
        amClient.allocate(0.1f);
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        // DO NOTHING
        }
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getMasterKey().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId());
    amClient.allocate(0.1f);
    // active the nextMasterKey, and replace the currentMasterKey
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    int waitCount = 0;
    while (waitCount++ <= 50) {
        if (amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId()) {
            break;
        }
        try {
            amClient.allocate(0.1f);
        } catch (Exception ex) {
            break;
        }
        Thread.sleep(500);
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getNextMasterKeyData() == null);
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    // start 2nd RM
    conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(45020, 10));
    final MyResourceManager2 rm2 = new MyResourceManager2(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.getRMContext().getAMRMTokenSecretManager();
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getNextMasterKeyData() == null);
    try {
        UserGroupInformation testUser = UserGroupInformation.createRemoteUser("testUser");
        SecurityUtil.setTokenService(token, rm2.getApplicationMasterService().getBindAddress());
        testUser.addToken(token);
        testUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

            @Override
            public ApplicationMasterProtocol run() {
                return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, rm2.getApplicationMasterService().getBindAddress(), conf);
            }
        }).allocate(Records.newRecord(AllocateRequest.class));
        Assert.fail("The old Token should not work");
    } catch (Exception ex) {
        Assert.assertTrue(ex instanceof InvalidToken);
        Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + token.decodeIdentifier().getApplicationAttemptId()));
    }
    // make sure the recovered AMRMToken works for new RM
    amClient.allocate(0.1f);
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AMRMTokenSecretManager(org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager) IOException(java.io.IOException) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) PrivilegedAction(java.security.PrivilegedAction) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 18 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientForUnregisterAMOnRMRestart.

// Test verify for
// 1. AM try to unregister without registering
// 2. AM register to RM, and try to unregister immediately after RM restart
@Test(timeout = 60000)
public void testAMRMClientForUnregisterAMOnRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // Phase-1 Start 1st RM
    MyResourceManager rm1 = new MyResourceManager(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = rm1.getRMContext().getRMApps().get(appAttemptId.getApplicationId()).getRMAppAttempt(appAttemptId).getAMRMToken();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // Phase-2 start 2nd RM is up
    MyResourceManager rm2 = new MyResourceManager(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
    NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, ContainerState.RUNNING, Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0);
    nm1.registerNode(Arrays.asList(containerReport), null);
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHING);
    nm1.nodeHeartbeat(appAttemptId, 1, ContainerState.COMPLETE);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHED);
    rm2.waitForState(app.getApplicationId(), RMAppState.FINISHED);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 19 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestAMRMRPCNodeUpdates method testAMRMUnusableNodes.

@Test
public void testAMRMUnusableNodes() throws Exception {
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10000);
    MockNM nm2 = rm.registerNode("127.0.0.2:1234", 10000);
    MockNM nm3 = rm.registerNode("127.0.0.3:1234", 10000);
    MockNM nm4 = rm.registerNode("127.0.0.4:1234", 10000);
    dispatcher.await();
    RMApp app1 = rm.submitApp(2000);
    // Trigger the scheduling so the AM gets 'launched' on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    // register AM returns no unusable node
    am1.registerAppAttempt();
    // allocate request returns no updated node
    AllocateRequest allocateRequest1 = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    List<NodeReport> updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
    syncNodeHeartbeat(nm4, false);
    // allocate request returns updated node
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    NodeReport nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());
    // resending the allocate request returns the same result
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());
    syncNodeLost(nm3);
    // subsequent allocate request returns delta
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm3.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.LOST, nr.getNodeState());
    // registering another AM gives it the complete failed list
    RMApp app2 = rm.submitApp(2000);
    // Trigger nm2 heartbeat so that AM gets launched on it
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
    // register AM returns all unusable nodes
    am2.registerAppAttempt();
    // allocate request returns no updated node
    AllocateRequest allocateRequest2 = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
    syncNodeHeartbeat(nm4, true);
    // both AM's should get delta updated nodes
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());
    allocateRequest2 = AllocateRequest.newInstance(response2.getResponseId(), 0F, null, null, null);
    response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());
    // subsequent allocate calls should return no updated nodes
    allocateRequest2 = AllocateRequest.newInstance(response2.getResponseId(), 0F, null, null, null);
    response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
// how to do the above for LOST node
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 20 with MockNM

use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.

the class TestAMRMRPCResponseId method testARRMResponseId.

@Test
public void testARRMResponseId() throws Exception {
    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    RMApp app = rm.submitApp(2000);
    // Trigger the scheduling so the AM gets 'launched'
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    am.registerAppAttempt();
    AllocateRequest allocateRequest = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(1, response.getResponseId());
    Assert.assertTrue(response.getAMCommand() == null);
    allocateRequest = AllocateRequest.newInstance(response.getResponseId(), 0F, null, null, null);
    response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(2, response.getResponseId());
    /* try resending */
    response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(2, response.getResponseId());
    /** try sending old request again **/
    allocateRequest = AllocateRequest.newInstance(0, 0F, null, null, null);
    try {
        allocate(attempt.getAppAttemptId(), allocateRequest);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e.getCause() instanceof InvalidApplicationMasterRequestException);
    }
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) Test(org.junit.Test)

Aggregations

MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)224 Test (org.junit.Test)218 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)196 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)128 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)127 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)79 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)69 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)48 ClientResponse (com.sun.jersey.api.client.ClientResponse)47 Configuration (org.apache.hadoop.conf.Configuration)47 WebResource (com.sun.jersey.api.client.WebResource)39 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)38 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)37 JSONObject (org.codehaus.jettison.json.JSONObject)37 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)36 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)33 Container (org.apache.hadoop.yarn.api.records.Container)29 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)28 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)23 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)22