Search in sources :

Example 61 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testAppAttemptTokensRestoredOnRMRestart.

@Test(timeout = 60000)
public void testAppAttemptTokensRestoredOnRMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    UserGroupInformation.setConfiguration(conf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = new TestSecurityMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("0.0.0.0:4321", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit an app
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), "default");
    // assert app info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    // Allocate the AM
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    // assert attempt info is saved
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // the clientTokenMasterKey that are generated when
    // RMAppAttempt is created,
    byte[] clientTokenMasterKey = attempt1.getClientTokenMasterKey().getEncoded();
    // assert application credentials are saved
    Credentials savedCredentials = attemptState.getAppAttemptTokens();
    Assert.assertArrayEquals("client token master key not saved", clientTokenMasterKey, savedCredentials.getSecretKey(RMStateStore.AM_CLIENT_TOKEN_MASTER_KEY_NAME));
    // start new RM
    MockRM rm2 = new TestSecurityMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    RMAppAttempt loadedAttempt1 = loadedApp1.getRMAppAttempt(attemptId1);
    // assert loaded attempt recovered
    Assert.assertNotNull(loadedAttempt1);
    // assert client token master key is recovered back to api-versioned
    // client token master key
    Assert.assertEquals("client token master key not restored", attempt1.getClientTokenMasterKey(), loadedAttempt1.getClientTokenMasterKey());
    // assert ClientTokenSecretManager also knows about the key
    Assert.assertArrayEquals(clientTokenMasterKey, rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1).getEncoded());
    // assert AMRMTokenSecretManager also knows about the AMRMToken password
    Token<AMRMTokenIdentifier> amrmToken = loadedAttempt1.getAMRMToken();
    Assert.assertArrayEquals(amrmToken.getPassword(), rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(amrmToken.decodeIdentifier()));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 62 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMApplicationHistoryWriter method createRMAppAttempt.

private static RMAppAttempt createRMAppAttempt(ApplicationAttemptId appAttemptId) {
    RMAppAttempt appAttempt = mock(RMAppAttempt.class);
    when(appAttempt.getAppAttemptId()).thenReturn(appAttemptId);
    when(appAttempt.getHost()).thenReturn("test host");
    when(appAttempt.getRpcPort()).thenReturn(-100);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(ContainerId.newContainerId(appAttemptId, 1));
    when(appAttempt.getMasterContainer()).thenReturn(container);
    when(appAttempt.getDiagnostics()).thenReturn("test diagnostics info");
    when(appAttempt.getTrackingUrl()).thenReturn("test url");
    when(appAttempt.getFinalApplicationStatus()).thenReturn(FinalApplicationStatus.UNDEFINED);
    return appAttempt;
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)

Example 63 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestAMRMRPCNodeUpdates method testAMRMUnusableNodes.

@Test
public void testAMRMUnusableNodes() throws Exception {
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10000);
    MockNM nm2 = rm.registerNode("127.0.0.2:1234", 10000);
    MockNM nm3 = rm.registerNode("127.0.0.3:1234", 10000);
    MockNM nm4 = rm.registerNode("127.0.0.4:1234", 10000);
    dispatcher.await();
    RMApp app1 = rm.submitApp(2000);
    // Trigger the scheduling so the AM gets 'launched' on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    // register AM returns no unusable node
    am1.registerAppAttempt();
    // allocate request returns no updated node
    AllocateRequest allocateRequest1 = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    List<NodeReport> updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
    syncNodeHeartbeat(nm4, false);
    // allocate request returns updated node
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    NodeReport nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());
    // resending the allocate request returns the same result
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.UNHEALTHY, nr.getNodeState());
    syncNodeLost(nm3);
    // subsequent allocate request returns delta
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm3.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.LOST, nr.getNodeState());
    // registering another AM gives it the complete failed list
    RMApp app2 = rm.submitApp(2000);
    // Trigger nm2 heartbeat so that AM gets launched on it
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
    // register AM returns all unusable nodes
    am2.registerAppAttempt();
    // allocate request returns no updated node
    AllocateRequest allocateRequest2 = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
    syncNodeHeartbeat(nm4, true);
    // both AM's should get delta updated nodes
    allocateRequest1 = AllocateRequest.newInstance(response1.getResponseId(), 0F, null, null, null);
    response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1);
    updatedNodes = response1.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());
    allocateRequest2 = AllocateRequest.newInstance(response2.getResponseId(), 0F, null, null, null);
    response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(1, updatedNodes.size());
    nr = updatedNodes.iterator().next();
    Assert.assertEquals(nm4.getNodeId(), nr.getNodeId());
    Assert.assertEquals(NodeState.RUNNING, nr.getNodeState());
    // subsequent allocate calls should return no updated nodes
    allocateRequest2 = AllocateRequest.newInstance(response2.getResponseId(), 0F, null, null, null);
    response2 = allocate(attempt2.getAppAttemptId(), allocateRequest2);
    updatedNodes = response2.getUpdatedNodes();
    Assert.assertEquals(0, updatedNodes.size());
// how to do the above for LOST node
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 64 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestAMRMRPCResponseId method testARRMResponseId.

@Test
public void testARRMResponseId() throws Exception {
    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    RMApp app = rm.submitApp(2000);
    // Trigger the scheduling so the AM gets 'launched'
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    am.registerAppAttempt();
    AllocateRequest allocateRequest = AllocateRequest.newInstance(0, 0F, null, null, null);
    AllocateResponse response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(1, response.getResponseId());
    Assert.assertTrue(response.getAMCommand() == null);
    allocateRequest = AllocateRequest.newInstance(response.getResponseId(), 0F, null, null, null);
    response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(2, response.getResponseId());
    /* try resending */
    response = allocate(attempt.getAppAttemptId(), allocateRequest);
    Assert.assertEquals(2, response.getResponseId());
    /** try sending old request again **/
    allocateRequest = AllocateRequest.newInstance(0, 0F, null, null, null);
    try {
        allocate(attempt.getAppAttemptId(), allocateRequest);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e.getCause() instanceof InvalidApplicationMasterRequestException);
    }
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) Test(org.junit.Test)

Example 65 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestSystemMetricsPublisher method testPublishAppAttemptMetricsForUnmanagedAM.

@Test(timeout = 10000)
public void testPublishAppAttemptMetricsForUnmanagedAM() throws Exception {
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1);
    RMAppAttempt appAttempt = createRMAppAttempt(appAttemptId, true);
    metricsPublisher.appAttemptRegistered(appAttempt, Integer.MAX_VALUE + 1L);
    RMApp app = mock(RMApp.class);
    when(app.getFinalApplicationStatus()).thenReturn(FinalApplicationStatus.UNDEFINED);
    metricsPublisher.appAttemptFinished(appAttempt, RMAppAttemptState.FINISHED, app, Integer.MAX_VALUE + 2L);
    TimelineEntity entity = null;
    do {
        entity = store.getEntity(appAttemptId.toString(), AppAttemptMetricsConstants.ENTITY_TYPE, EnumSet.allOf(Field.class));
    // ensure two events are both published before leaving the loop
    } while (entity == null || entity.getEvents().size() < 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity) Test(org.junit.Test)

Aggregations

RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)123 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)91 Test (org.junit.Test)71 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)40 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)35 Container (org.apache.hadoop.yarn.api.records.Container)31 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)30 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)28 ArrayList (java.util.ArrayList)26 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)22 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)22 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)21 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)19 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)18 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)16 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)14 HashMap (java.util.HashMap)13 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)13 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)12