Search in sources :

Example 21 with NMContainerStatus

use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.

the class TestRMRestart method testSynchronouslyRenewDTOnRecovery.

// Test Delegation token is renewed synchronously so that recover events
// can be processed before any other external incoming events, specifically
// the ContainerFinished event on NM re-registraton.
@Test(timeout = 20000)
public void testSynchronouslyRenewDTOnRecovery() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app0 = rm1.submitApp(200);
    final MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    MockRM rm2 = new MockRM(conf, memStore) {

        @Override
        protected ResourceTrackerService createResourceTrackerService() {
            return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {

                @Override
                protected void serviceStart() throws Exception {
                    // send the container_finished event as soon as the
                    // ResourceTrackerService is started.
                    super.serviceStart();
                    nm1.setResourceTrackerService(getResourceTrackerService());
                    NMContainerStatus status = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
                    nm1.registerNode(Arrays.asList(status), null);
                }
            };
        }
    };
    try {
        // Re-start RM
        rm2.start();
        // wait for the 2nd attempt to be started.
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        int timeoutSecs = 0;
        while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
            Thread.sleep(200);
        }
        MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
        MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
    } finally {
        rm2.stop();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) Test(org.junit.Test)

Example 22 with NMContainerStatus

use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.

the class TestRMRestart method createNMContainerStatus.

public static NMContainerStatus createNMContainerStatus(ApplicationAttemptId appAttemptId, int id, ContainerState containerState, String nodeLabelExpression) {
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, id);
    NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, containerState, Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0, nodeLabelExpression);
    return containerReport;
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)

Example 23 with NMContainerStatus

use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.

the class TestResourceTrackerService method testHandleContainerStatusInvalidCompletions.

@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testHandleContainerStatusInvalidCompletions() throws Exception {
    rm = new MockRM(new YarnConfiguration());
    rm.start();
    EventHandler handler = spy(rm.getRMContext().getDispatcher().getEventHandler());
    // Case 1: Unmanaged AM
    RMApp app = rm.submitApp(1024, true);
    // Case 1.1: AppAttemptId is null
    NMContainerStatus report = NMContainerStatus.newInstance(ContainerId.newContainerId(ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), 0, ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234);
    rm.getResourceTrackerService().handleNMContainerStatus(report, null);
    verify(handler, never()).handle((Event) any());
    // Case 1.2: Master container is null
    RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt();
    currentAttempt.setMasterContainer(null);
    report = NMContainerStatus.newInstance(ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), 0, ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234);
    rm.getResourceTrackerService().handleNMContainerStatus(report, null);
    verify(handler, never()).handle((Event) any());
    // Case 2: Managed AM
    app = rm.submitApp(1024);
    // Case 2.1: AppAttemptId is null
    report = NMContainerStatus.newInstance(ContainerId.newContainerId(ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), 0, ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234);
    try {
        rm.getResourceTrackerService().handleNMContainerStatus(report, null);
    } catch (Exception e) {
    // expected - ignore
    }
    verify(handler, never()).handle((Event) any());
    // Case 2.2: Master container is null
    currentAttempt = (RMAppAttemptImpl) app.getCurrentAppAttempt();
    currentAttempt.setMasterContainer(null);
    report = NMContainerStatus.newInstance(ContainerId.newContainerId(currentAttempt.getAppAttemptId(), 0), 0, ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234);
    try {
        rm.getResourceTrackerService().handleNMContainerStatus(report, null);
    } catch (Exception e) {
    // expected - ignore
    }
    verify(handler, never()).handle((Event) any());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMAppAttemptImpl(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl) EventHandler(org.apache.hadoop.yarn.event.EventHandler) IOException(java.io.IOException) Test(org.junit.Test)

Example 24 with NMContainerStatus

use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.

the class TestWorkPreservingRMRestartForNodeLabel method testWorkPreservingRestartForNodeLabel.

@Test
public void testWorkPreservingRestartForNodeLabel() throws Exception {
    // This test is pretty much similar to testContainerAllocateWithLabel.
    // Difference is, this test doesn't specify label expression in ResourceRequest,
    // instead, it uses default queue label expression
    // set node -> label
    mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x"), NodeId.newInstance("h2", 0), toSet("y")));
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    conf = TestUtils.getConfigurationWithDefaultQueueLabels(conf);
    // inject node label manager
    MockRM rm1 = new MockRM(conf, memStore) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = x
    MockNM nm1 = rm1.registerNode("h1:1234", 8000);
    // label = y
    MockNM nm2 = rm1.registerNode("h2:1234", 8000);
    // label = <empty>
    MockNM nm3 = rm1.registerNode("h3:1234", 8000);
    ContainerId containerId;
    // launch an app to queue a1 (label = x), and check all container will
    // be allocated in h1
    RMApp app1 = rm1.submitApp(200, "app", "user", null, "a1");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // request a container.
    am1.allocate("*", 1024, 1, new ArrayList<ContainerId>());
    containerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    Assert.assertTrue(rm1.waitForState(nm1, containerId, RMContainerState.ALLOCATED));
    checkRMContainerLabelExpression(ContainerId.newContainerId(am1.getApplicationAttemptId(), 1), rm1, "x");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am1.getApplicationAttemptId(), 2), rm1, "x");
    // launch an app to queue b1 (label = y), and check all container will
    // be allocated in h2
    RMApp app2 = rm1.submitApp(200, "app", "user", null, "b1");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // request a container.
    am2.allocate("*", 1024, 1, new ArrayList<ContainerId>());
    containerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 2);
    Assert.assertTrue(rm1.waitForState(nm2, containerId, RMContainerState.ALLOCATED));
    checkRMContainerLabelExpression(ContainerId.newContainerId(am2.getApplicationAttemptId(), 1), rm1, "y");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am2.getApplicationAttemptId(), 2), rm1, "y");
    // launch an app to queue c1 (label = ""), and check all container will
    // be allocated in h3
    RMApp app3 = rm1.submitApp(200, "app", "user", null, "c1");
    MockAM am3 = MockRM.launchAndRegisterAM(app3, rm1, nm3);
    // request a container.
    am3.allocate("*", 1024, 1, new ArrayList<ContainerId>());
    containerId = ContainerId.newContainerId(am3.getApplicationAttemptId(), 2);
    Assert.assertTrue(rm1.waitForState(nm3, containerId, RMContainerState.ALLOCATED));
    checkRMContainerLabelExpression(ContainerId.newContainerId(am3.getApplicationAttemptId(), 1), rm1, "");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am3.getApplicationAttemptId(), 2), rm1, "");
    // Re-start RM
    mgr = new NullRMNodeLabelsManager();
    mgr.init(conf);
    mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x"), NodeId.newInstance("h2", 0), toSet("y")));
    MockRM rm2 = new MockRM(conf, memStore) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());
    nm3.setResourceTrackerService(rm2.getResourceTrackerService());
    // recover app
    NMContainerStatus app1c1 = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING, "x");
    NMContainerStatus app1c2 = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING, "x");
    nm1.registerNode(Arrays.asList(app1c1, app1c2), null);
    waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    checkRMContainerLabelExpression(ContainerId.newContainerId(am1.getApplicationAttemptId(), 1), rm1, "x");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am1.getApplicationAttemptId(), 2), rm1, "x");
    NMContainerStatus app2c1 = TestRMRestart.createNMContainerStatus(am2.getApplicationAttemptId(), 1, ContainerState.RUNNING, "y");
    NMContainerStatus app2c2 = TestRMRestart.createNMContainerStatus(am2.getApplicationAttemptId(), 2, ContainerState.RUNNING, "y");
    nm2.registerNode(Arrays.asList(app2c1, app2c2), null);
    waitForNumContainersToRecover(2, rm2, am2.getApplicationAttemptId());
    checkRMContainerLabelExpression(ContainerId.newContainerId(am2.getApplicationAttemptId(), 1), rm1, "y");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am2.getApplicationAttemptId(), 2), rm1, "y");
    NMContainerStatus app3c1 = TestRMRestart.createNMContainerStatus(am3.getApplicationAttemptId(), 1, ContainerState.RUNNING, "");
    NMContainerStatus app3c2 = TestRMRestart.createNMContainerStatus(am3.getApplicationAttemptId(), 2, ContainerState.RUNNING, "");
    nm3.registerNode(Arrays.asList(app3c1, app3c2), null);
    waitForNumContainersToRecover(2, rm2, am3.getApplicationAttemptId());
    checkRMContainerLabelExpression(ContainerId.newContainerId(am3.getApplicationAttemptId(), 1), rm1, "");
    checkRMContainerLabelExpression(ContainerId.newContainerId(am3.getApplicationAttemptId(), 2), rm1, "");
    // Check recovered resource usage
    checkAppResourceUsage("x", app1.getApplicationId(), rm1, 2 * GB);
    checkAppResourceUsage("y", app2.getApplicationId(), rm1, 2 * GB);
    checkAppResourceUsage("", app3.getApplicationId(), rm1, 2 * GB);
    checkQueueResourceUsage("x", "a1", rm1, 2 * GB);
    checkQueueResourceUsage("y", "b1", rm1, 2 * GB);
    checkQueueResourceUsage("", "c1", rm1, 2 * GB);
    checkQueueResourceUsage("x", "a", rm1, 2 * GB);
    checkQueueResourceUsage("y", "b", rm1, 2 * GB);
    checkQueueResourceUsage("", "c", rm1, 2 * GB);
    checkQueueResourceUsage("x", "root", rm1, 2 * GB);
    checkQueueResourceUsage("y", "root", rm1, 2 * GB);
    checkQueueResourceUsage("", "root", rm1, 2 * GB);
    rm1.close();
    rm2.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) NullRMNodeLabelsManager(org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager) Test(org.junit.Test)

Example 25 with NMContainerStatus

use of org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus in project hadoop by apache.

the class MockRM method waitForContainerToComplete.

public void waitForContainerToComplete(RMAppAttempt attempt, NMContainerStatus completedContainer) throws InterruptedException {
    drainEventsImplicitly();
    int timeWaiting = 0;
    while (timeWaiting < TIMEOUT_MS_FOR_CONTAINER_AND_NODE) {
        List<ContainerStatus> containers = attempt.getJustFinishedContainers();
        LOG.info("Received completed containers " + containers);
        for (ContainerStatus container : containers) {
            if (container.getContainerId().equals(completedContainer.getContainerId())) {
                return;
            }
        }
        Thread.sleep(WAIT_MS_PER_LOOP);
        timeWaiting += WAIT_MS_PER_LOOP;
    }
}
Also used : NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus)

Aggregations

NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)39 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)24 Test (org.junit.Test)24 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)22 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)11 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)11 ArrayList (java.util.ArrayList)10 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)10 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)9 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)8 Container (org.apache.hadoop.yarn.api.records.Container)8 Resource (org.apache.hadoop.yarn.api.records.Resource)7 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)6 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)6 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)6 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)4 NodeHeartbeatResponse (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse)4 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)4 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)4