Search in sources :

Example 31 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testRMRestartOnMissingAttempts.

@Test(timeout = 60000)
public void testRMRestartOnMissingAttempts() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create an app and finish the app.
    RMApp app0 = rm1.submitApp(200);
    ApplicationStateData app0State = memStore.getState().getApplicationState().get(app0.getApplicationId());
    MockAM am0 = launchAndFailAM(app0, rm1, nm1);
    MockAM am1 = launchAndFailAM(app0, rm1, nm1);
    MockAM am2 = launchAndFailAM(app0, rm1, nm1);
    MockAM am3 = launchAM(app0, rm1, nm1);
    // am1 is missed from MemoryRMStateStore
    memStore.removeApplicationAttemptInternal(am1.getApplicationAttemptId());
    ApplicationAttemptStateData am2State = app0State.getAttempt(am2.getApplicationAttemptId());
    // am2's state is not consistent: MemoryRMStateStore just saved its initial
    // state and failed to store its final state
    am2State.setState(null);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
    RMApp recoveredApp0 = rm2.getRMContext().getRMApps().values().iterator().next();
    Map<ApplicationAttemptId, RMAppAttempt> recoveredAppAttempts = recoveredApp0.getAppAttempts();
    Assert.assertEquals(3, recoveredAppAttempts.size());
    Assert.assertEquals(RMAppAttemptState.FAILED, recoveredAppAttempts.get(am0.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(RMAppAttemptState.FAILED, recoveredAppAttempts.get(am2.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(RMAppAttemptState.LAUNCHED, recoveredAppAttempts.get(am3.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(5, ((RMAppImpl) app0).getNextAttemptId());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Test(org.junit.Test)

Example 32 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testRMRestartTimelineCollectorContext.

@Test(timeout = 60000)
public void testRMRestartTimelineCollectorContext() throws Exception {
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = null;
    MockRM rm2 = null;
    try {
        rm1 = createMockRM(conf, memStore);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();
        // submit an app.
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
        // Check if app info has been saved.
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
        // Allocate the AM
        nm1.nodeHeartbeat(true);
        RMAppAttempt attempt = app.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        ApplicationId appId = app.getApplicationId();
        TimelineCollectorContext contextBeforeRestart = rm1.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
        // Restart RM.
        rm2 = createMockRM(conf, memStore);
        rm2.start();
        Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
        rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
        TimelineCollectorContext contextAfterRestart = rm2.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
        Assert.assertEquals("Collector contexts for an app should be same " + "across restarts", contextBeforeRestart, contextAfterRestart);
    } finally {
        conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, false);
        if (rm1 != null) {
            rm1.close();
        }
        if (rm2 != null) {
            rm2.close();
        }
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) TimelineCollectorContext(org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 33 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testRMRestartOnMaxAppAttempts.

@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit an app with maxAppAttempts equals to 1
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
    // submit an app with maxAppAttempts equals to -1
    RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
    // assert app1 info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
    // Allocate the AM
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // Setting AMLivelinessMonitor interval to be 3 Secs.
    conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
    // start new RM   
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    // verify that maxAppAttempts is set to global value
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
    // app1 and app2 are loaded back, but app1 failed because it's
    // hitting max-retry.
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
    rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
    rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
    // app1 failed state is saved in state store. app2 final saved state is not
    // determined yet.
    Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
    Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Test(org.junit.Test)

Example 34 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRM method testNMTokenSentForNormalContainer.

// Test even if AM container is allocated with containerId not equal to 1, the
// following allocate requests from AM should be able to retrieve the
// corresponding NM Token.
@Test(timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
    conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName());
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("h1:1234", 5120);
    RMApp app = rm.submitApp(2000);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    // Call getNewContainerId to increase container Id so that the AM container
    // Id doesn't equal to one.
    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
    cs.getApplicationAttempt(attempt.getAppAttemptId()).getNewContainerId();
    MockAM am = MockRM.launchAM(app, rm, nm1);
    // am container Id not equal to 1.
    Assert.assertTrue(attempt.getMasterContainer().getId().getContainerId() != 1);
    // NMSecretManager doesn't record the node on which the am is allocated.
    Assert.assertFalse(rm.getRMContext().getNMTokenSecretManager().isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
    am.registerAppAttempt();
    rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
    int NUM_CONTAINERS = 1;
    List<Container> containers = new ArrayList<Container>();
    // nmTokens keeps track of all the nmTokens issued in the allocate call.
    List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
    // am1 allocate 1 container on nm1.
    while (true) {
        AllocateResponse response = am.allocate("127.0.0.1", 2000, NUM_CONTAINERS, new ArrayList<ContainerId>());
        nm1.nodeHeartbeat(true);
        containers.addAll(response.getAllocatedContainers());
        expectedNMTokens.addAll(response.getNMTokens());
        if (containers.size() == NUM_CONTAINERS) {
            break;
        }
        Thread.sleep(200);
        System.out.println("Waiting for container to be allocated.");
    }
    NodeId nodeId = expectedNMTokens.get(0).getNodeId();
    // NMToken is sent for the allocated container.
    Assert.assertEquals(nm1.getNodeId(), nodeId);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NMToken(org.apache.hadoop.yarn.api.records.NMToken) ArrayList(java.util.ArrayList) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 35 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRM method testActivatingApplicationAfterAddingNM.

@Test(timeout = 300000)
public void testActivatingApplicationAfterAddingNM() throws Exception {
    MockRM rm1 = new MockRM(conf);
    // start like normal because state is empty
    rm1.start();
    // app that gets launched
    RMApp app1 = rm1.submitApp(200);
    // app that does not get launched
    RMApp app2 = rm1.submitApp(200);
    // app1 and app2 should be scheduled, but because no resource is available,
    // they are not activated.
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
    rm1.waitForState(attemptId2, RMAppAttemptState.SCHEDULED);
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    MockNM nm2 = new MockNM("h2:5678", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    //kick the scheduling
    nm1.nodeHeartbeat(true);
    // app1 should be allocated now
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    rm1.waitForState(attemptId2, RMAppAttemptState.SCHEDULED);
    nm2.nodeHeartbeat(true);
    // app2 should be allocated now
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    rm1.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Test(org.junit.Test)

Aggregations

RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)123 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)91 Test (org.junit.Test)71 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)40 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)35 Container (org.apache.hadoop.yarn.api.records.Container)31 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)30 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)28 ArrayList (java.util.ArrayList)26 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)22 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)22 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)21 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)19 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)18 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)16 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)14 HashMap (java.util.HashMap)13 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)13 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)12