Search in sources :

Example 96 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class TestContainerResourceUsage method testUsageWithMultipleContainersAndRMRestart.

@Test(timeout = 120000)
public void testUsageWithMultipleContainersAndRMRestart() throws Exception {
    // Set max attempts to 1 so that when the first attempt fails, the app
    // won't try to start a new one.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm0 = new MockRM(conf, memStore);
    rm0.start();
    MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.getResourceTrackerService());
    nm.registerNode();
    RMApp app0 = rm0.submitApp(200);
    rm0.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
    RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
    ApplicationAttemptId attemptId0 = attempt0.getAppAttemptId();
    rm0.waitForState(attemptId0, RMAppAttemptState.SCHEDULED);
    nm.nodeHeartbeat(true);
    rm0.waitForState(attemptId0, RMAppAttemptState.ALLOCATED);
    MockAM am0 = rm0.sendAMLaunched(attempt0.getAppAttemptId());
    am0.registerAppAttempt();
    int NUM_CONTAINERS = 2;
    am0.allocate("127.0.0.1", 1000, NUM_CONTAINERS, new ArrayList<ContainerId>());
    nm.nodeHeartbeat(true);
    List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() != NUM_CONTAINERS) {
        nm.nodeHeartbeat(true);
        conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // launch the 2nd and 3rd containers.
    for (Container c : conts) {
        nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.RUNNING);
        rm0.waitForState(nm, c.getId(), RMContainerState.RUNNING);
    }
    // Get the RMContainers for all of the live containers, to be used later
    // for metrics calculations and comparisons.
    Collection<RMContainer> rmContainers = rm0.scheduler.getSchedulerAppInfo(attempt0.getAppAttemptId()).getLiveContainers();
    // Allow metrics to accumulate.
    int sleepInterval = 1000;
    int cumulativeSleepTime = 0;
    while (app0.getRMAppMetrics().getMemorySeconds() <= 0 && cumulativeSleepTime < 5000) {
        Thread.sleep(sleepInterval);
        cumulativeSleepTime += sleepInterval;
    }
    // Stop all non-AM containers
    for (Container c : conts) {
        if (c.getId().getContainerId() == 1)
            continue;
        nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.COMPLETE);
        rm0.waitForState(nm, c.getId(), RMContainerState.COMPLETED);
    }
    // After all other containers have completed, manually complete the master
    // container in order to trigger a save to the state store of the resource
    // usage metrics. This will cause the attempt to fail, and, since the max
    // attempt retries is 1, the app will also fail. This is intentional so
    // that all containers will complete prior to saving.
    ContainerId cId = ContainerId.newContainerId(attempt0.getAppAttemptId(), 1);
    nm.nodeHeartbeat(attempt0.getAppAttemptId(), cId.getContainerId(), ContainerState.COMPLETE);
    rm0.waitForState(nm, cId, RMContainerState.COMPLETED);
    // Check that the container metrics match those from the app usage report.
    long memorySeconds = 0;
    long vcoreSeconds = 0;
    for (RMContainer c : rmContainers) {
        AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
        memorySeconds += ru.getMemorySeconds();
        vcoreSeconds += ru.getVcoreSeconds();
    }
    RMAppMetrics metricsBefore = app0.getRMAppMetrics();
    Assert.assertEquals("Unexpected MemorySeconds value", memorySeconds, metricsBefore.getMemorySeconds());
    Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore.getVcoreSeconds());
    // create new RM to represent RM restart. Load up the state store.
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    RMApp app0After = rm1.getRMContext().getRMApps().get(app0.getApplicationId());
    // Compare container resource usage metrics from before and after restart.
    RMAppMetrics metricsAfter = app0After.getRMAppMetrics();
    Assert.assertEquals("Vcore seconds were not the same after RM Restart", metricsBefore.getVcoreSeconds(), metricsAfter.getVcoreSeconds());
    Assert.assertEquals("Memory seconds were not the same after RM Restart", metricsBefore.getMemorySeconds(), metricsAfter.getMemorySeconds());
    rm0.stop();
    rm0.close();
    rm1.stop();
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) AggregateAppResourceUsage(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage) Test(org.junit.Test)

Example 97 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class TestLeafQueue method testReservation.

@Test
public void testReservation() throws Exception {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
    //unset maxCapacity
    a.setMaxCapacity(1.0f);
    // Users
    final String user_0 = "user_0";
    final String user_1 = "user_1";
    // Submit applications
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
    FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    a.submitApplicationAttempt(app_0, user_0);
    final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
    FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), spyRMContext);
    a.submitApplicationAttempt(app_1, user_1);
    // Setup some nodes
    String host_0 = "127.0.0.1";
    FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4 * GB);
    Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
    Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
    final int numNodes = 2;
    Resource clusterResource = Resources.createResource(numNodes * (4 * GB), numNodes * 16);
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    // Setup resource-requests
    Priority priority = TestUtils.createMockPriority(1);
    app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority, recordFactory)));
    app_1.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 4 * GB, 1, true, priority, recordFactory)));
    // Start testing...
    // Only 1 container
    applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
    assertEquals(1 * GB, a.getUsedResources().getMemorySize());
    assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
    assertEquals(0 * GB, a.getMetrics().getAvailableMB());
    // Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
    // you can get one container more than user-limit
    applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
    assertEquals(2 * GB, a.getUsedResources().getMemorySize());
    assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
    // Now, reservation should kick in for app_1
    applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
    assertEquals(6 * GB, a.getUsedResources().getMemorySize());
    assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
    assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(4 * GB, a.getMetrics().getReservedMB());
    assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
    // Now free 1 container from app_0 i.e. 1G
    RMContainer rmContainer = app_0.getLiveContainers().iterator().next();
    a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
    applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
    assertEquals(5 * GB, a.getUsedResources().getMemorySize());
    assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
    assertEquals(1 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(4 * GB, a.getMetrics().getReservedMB());
    assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
    // Now finish another container from app_0 and fulfill the reservation
    rmContainer = app_0.getLiveContainers().iterator().next();
    a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
    applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
    assertEquals(4 * GB, a.getUsedResources().getMemorySize());
    assertEquals(0 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(4 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentReservation().getMemorySize());
    assertEquals(4 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(4 * GB, a.getMetrics().getAllocatedMB());
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ActiveUsersManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager) Test(org.junit.Test)

Example 98 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class TestNodeLabelContainerAllocation method checkTaskContainersHost.

private void checkTaskContainersHost(ApplicationAttemptId attemptId, ContainerId containerId, ResourceManager rm, String host) {
    YarnScheduler scheduler = rm.getRMContext().getScheduler();
    SchedulerAppReport appReport = scheduler.getSchedulerAppInfo(attemptId);
    Assert.assertTrue(appReport.getLiveContainers().size() > 0);
    for (RMContainer c : appReport.getLiveContainers()) {
        if (c.getContainerId().equals(containerId)) {
            Assert.assertEquals(host, c.getAllocatedNode().getHost());
        }
    }
}
Also used : YarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulerAppReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport)

Example 99 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class TestReservations method testGetAppToUnreserve.

@Test
public void testGetAppToUnreserve() throws Exception {
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
    setup(csConf);
    final String user_0 = "user_0";
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
    LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
    FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    String host_0 = "host_0";
    FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
    String host_1 = "host_1";
    FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
    Resource clusterResource = Resources.createResource(2 * 8 * GB);
    // Setup resource-requests
    Priority p = TestUtils.createMockPriority(5);
    SchedulerRequestKey priorityMap = toSchedulerKey(p);
    Resource capability = Resources.createResource(2 * GB, 0);
    RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
    SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
    RMContext rmContext = mock(RMContext.class);
    ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);
    DrainDispatcher drainDispatcher = new DrainDispatcher();
    when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
    when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
    when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
    when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
    when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
    ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(app_0.getApplicationId(), 1);
    ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
    Container container = TestUtils.getMockContainer(containerId, node_1.getNodeID(), Resources.createResource(2 * GB), priorityMap.getPriority());
    RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), appAttemptId, node_1.getNodeID(), "user", rmContext);
    Container container_1 = TestUtils.getMockContainer(containerId, node_0.getNodeID(), Resources.createResource(1 * GB), priorityMap.getPriority());
    RMContainer rmContainer_1 = new RMContainerImpl(container_1, SchedulerRequestKey.extractFrom(container_1), appAttemptId, node_0.getNodeID(), "user", rmContext);
    // no reserved containers
    NodeId unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
    assertEquals(null, unreserveId);
    // no reserved containers - reserve then unreserve
    app_0.reserve(node_0, priorityMap, rmContainer_1, container_1);
    app_0.unreserve(priorityMap, node_0, rmContainer_1);
    unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
    assertEquals(null, unreserveId);
    // no container large enough is reserved
    app_0.reserve(node_0, priorityMap, rmContainer_1, container_1);
    unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
    assertEquals(null, unreserveId);
    // reserve one that is now large enough
    app_0.reserve(node_1, priorityMap, rmContainer, container);
    unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
    assertEquals(node_1.getNodeID(), unreserveId);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ContainerAllocationExpirer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) RMApplicationHistoryWriter(org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulerRequestKey(org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMContainerImpl(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl) SystemMetricsPublisher(org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ActiveUsersManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager) Test(org.junit.Test)

Example 100 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class TestWorkPreservingRMRestartForNodeLabel method checkRMContainerLabelExpression.

private void checkRMContainerLabelExpression(ContainerId containerId, MockRM rm, String labelExpression) {
    RMContainer container = rm.getRMContext().getScheduler().getRMContainer(containerId);
    Assert.assertNotNull("Cannot find RMContainer=" + containerId, container);
    Assert.assertEquals(labelExpression, container.getNodeLabelExpression());
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Aggregations

RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)166 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Resource (org.apache.hadoop.yarn.api.records.Resource)49 Container (org.apache.hadoop.yarn.api.records.Container)48 Test (org.junit.Test)45 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)41 ArrayList (java.util.ArrayList)29 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)29 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)28 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)21 RMContainerImpl (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl)18 HashMap (java.util.HashMap)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)17 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)15 Priority (org.apache.hadoop.yarn.api.records.Priority)14 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)13 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12