Search in sources :

Example 6 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestNodeBlacklistingOnAMFailures method startRM.

private MockRM startRM(YarnConfiguration conf, final DrainDispatcher dispatcher) {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore) {

        @Override
        protected Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    rm1.start();
    return rm1;
}
Also used : MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)

Example 7 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestApplicationCleanup method testContainerCleanupWhenRMRestartedAppNotRegistered.

@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testContainerCleanupWhenRMRestartedAppNotRegistered() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    final DrainDispatcher dispatcher = new DrainDispatcher();
    MockRM rm1 = new MockRM(conf, memStore) {

        @Override
        protected Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    rm1.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
    // start new RM
    final DrainDispatcher dispatcher2 = new DrainDispatcher();
    MockRM rm2 = new MockRM(conf, memStore) {

        @Override
        protected Dispatcher createDispatcher() {
            return dispatcher2;
        }
    };
    rm2.start();
    // nm1 register to rm2, and do a heartbeat
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm1.registerNode(Arrays.asList(app0.getApplicationId()));
    rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
    // Add unknown container for application unknown to scheduler
    NodeHeartbeatResponse response = nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    waitForContainerCleanup(dispatcher2, nm1, response);
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) Test(org.junit.Test)

Example 8 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestApplicationCleanup method testAppCleanupWhenRMRestartedAfterAppFinished.

@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testAppCleanupWhenRMRestartedAfterAppFinished() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED);
    // start new RM
    MockRM rm2 = new MockRM(conf, memStore);
    rm2.start();
    // nm1 register to rm2, and do a heartbeat
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm1.registerNode(Arrays.asList(app0.getApplicationId()));
    rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
    // wait for application cleanup message received
    waitForAppCleanupMessageRecved(nm1, app0.getApplicationId());
    rm1.stop();
    rm2.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) Test(org.junit.Test)

Example 9 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestApplicationCleanup method testProcessingNMContainerStatusesOnNMRestart.

// The test verifies processing of NMContainerStatuses which are sent during
// NM registration.
// 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
// 2. AM sends ResourceRequest for 1 container with memory 2048MB.
// 3. Verify for number of container allocated by RM
// 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
// memory. 1024 + 2048=3072
// 5. Re-register NM by sending completed container status
// 6. Verify for Memory Used, it should be 1024
// 7. Send AM heatbeat to RM. Allocated response should contain completed
// container.
@Test(timeout = 60000)
public void testProcessingNMContainerStatusesOnNMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    int nmMemory = 8192;
    int amMemory = 1024;
    int containerMemory = 2048;
    MockNM nm1 = new MockNM("127.0.0.1:1234", nmMemory, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app0 = rm1.submitApp(amMemory);
    MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
    int noOfContainers = 1;
    List<Container> allocateContainers = am0.allocateAndWaitForContainers(noOfContainers, containerMemory, nm1);
    // 3. Verify for number of container allocated by RM
    Assert.assertEquals(noOfContainers, allocateContainers.size());
    Container container = allocateContainers.get(0);
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), container.getId().getContainerId(), ContainerState.RUNNING);
    rm1.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
    // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
    // requested memory. 1024 + 2048=3072
    ResourceScheduler rs = rm1.getRMContext().getScheduler();
    long allocatedMB = rs.getRootQueueMetrics().getAllocatedMB();
    Assert.assertEquals(amMemory + containerMemory, allocatedMB);
    // 5. Re-register NM by sending completed container status
    List<NMContainerStatus> nMContainerStatusForApp = createNMContainerStatusForApp(am0);
    nm1.registerNode(nMContainerStatusForApp, Arrays.asList(app0.getApplicationId()));
    waitForClusterMemory(nm1, rs, amMemory);
    // 6. Verify for Memory Used, it should be 1024
    Assert.assertEquals(amMemory, rs.getRootQueueMetrics().getAllocatedMB());
    // 7. Send AM heatbeat to RM. Allocated response should contain completed
    // container
    AllocateRequest req = AllocateRequest.newInstance(0, 0F, new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>(), null);
    AllocateResponse allocate = am0.allocate(req);
    List<ContainerStatus> completedContainersStatuses = allocate.getCompletedContainersStatuses();
    Assert.assertEquals(noOfContainers, completedContainersStatuses.size());
    // Application clean up should happen Cluster memory used is 0
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    waitForClusterMemory(nm1, rs, 0);
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 10 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestApplicationLifetimeMonitor method testApplicationLifetimeOnRMRestart.

@Test(timeout = 180000)
public void testApplicationLifetimeOnRMRestart() throws Exception {
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm1.nodeHeartbeat(true);
    long appLifetime = 60L;
    Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
    timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
    RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // Re-start RM
    MockRM rm2 = new MockRM(conf, memStore);
    // make sure app has been unregistered with old RM else both will trigger
    // Expire event
    rm1.getRMContext().getRMAppLifetimeMonitor().unregisterApp(app1.getApplicationId(), ApplicationTimeoutType.LIFETIME);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // recover app
    RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer), null);
    // Wait for RM to settle down on recovering containers;
    TestWorkPreservingRMRestart.waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
    assertTrue(launchedContainers.contains(amContainer.getContainerId()));
    assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
    // check RMContainers are re-recreated and the container state is correct.
    rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
    // re register attempt to rm2
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.ACCEPTED);
    am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    am1.registerAppAttempt();
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.RUNNING);
    // wait for app life time and application to be in killed state.
    rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.KILLED);
    Assert.assertTrue("Application killed before lifetime value", recoveredApp1.getFinishTime() > (recoveredApp1.getSubmitTime() + appLifetime * 1000));
}
Also used : HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationTimeoutType(org.apache.hadoop.yarn.api.records.ApplicationTimeoutType) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Aggregations

MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)84 Test (org.junit.Test)81 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)68 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)27 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)23 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)22 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)20 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)16 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)16 IOException (java.io.IOException)15 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)14 Configuration (org.apache.hadoop.conf.Configuration)12 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)10 ArrayList (java.util.ArrayList)9 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)9