Search in sources :

Example 91 with FiCaSchedulerApp

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp in project hadoop by apache.

the class TestQueueState method testQueueStateTransit.

@Test(timeout = 15000)
public void testQueueStateTransit() throws Exception {
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { Q1 });
    csConf.setQueues(Q1_PATH, new String[] { Q2, Q3 });
    csConf.setCapacity(Q1_PATH, 100);
    csConf.setCapacity(Q2_PATH, 50);
    csConf.setCapacity(Q3_PATH, 50);
    conf = new YarnConfiguration(csConf);
    cs = new CapacityScheduler();
    RMContext rmContext = TestUtils.getMockRMContext();
    cs.setConf(conf);
    cs.setRMContext(rmContext);
    cs.init(conf);
    //by default, the state of ALL queues should be RUNNING
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
    // submit an application to Q2
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    String userName = "testUser";
    cs.getQueue(Q2).submitApplication(appId, userName, Q2);
    FiCaSchedulerApp app = getMockApplication(appId, userName, Resources.createResource(4, 0));
    cs.getQueue(Q2).submitApplicationAttempt(app, userName);
    // set Q2 state to stop and do reinitialize.
    csConf.setState(Q2_PATH, QueueState.STOPPED);
    conf = new YarnConfiguration(csConf);
    cs.reinitialize(conf, rmContext);
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
    // set Q1 state to stop and do reinitialize.
    csConf.setState(Q1_PATH, QueueState.STOPPED);
    conf = new YarnConfiguration(csConf);
    cs.reinitialize(conf, rmContext);
    Assert.assertEquals(QueueState.DRAINING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
    // Active Q3, should fail
    csConf.setState(Q3_PATH, QueueState.RUNNING);
    conf = new YarnConfiguration(csConf);
    try {
        cs.reinitialize(conf, rmContext);
        Assert.fail("Should throw an Exception.");
    } catch (Exception ex) {
    // Do Nothing
    }
    // stop the app running in q2
    cs.getQueue(Q2).finishApplicationAttempt(app, Q2);
    cs.getQueue(Q2).finishApplication(appId, userName);
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
}
Also used : RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 92 with FiCaSchedulerApp

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp in project hadoop by apache.

the class TestQueueState method getMockApplication.

private FiCaSchedulerApp getMockApplication(ApplicationId appId, String user, Resource amResource) {
    FiCaSchedulerApp application = mock(FiCaSchedulerApp.class);
    ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    doReturn(applicationAttemptId.getApplicationId()).when(application).getApplicationId();
    doReturn(applicationAttemptId).when(application).getApplicationAttemptId();
    doReturn(user).when(application).getUser();
    doReturn(amResource).when(application).getAMResource();
    doReturn(Priority.newInstance(0)).when(application).getPriority();
    doReturn(CommonNodeLabelsManager.NO_LABEL).when(application).getAppAMNodePartitionName();
    doReturn(amResource).when(application).getAMResource(CommonNodeLabelsManager.NO_LABEL);
    when(application.compareInputOrderTo(any(FiCaSchedulerApp.class))).thenCallRealMethod();
    return application;
}
Also used : FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId)

Example 93 with FiCaSchedulerApp

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp in project hadoop by apache.

the class TestQueueStateManager method testQueueStateManager.

@Test
public void testQueueStateManager() throws AccessControlException, YarnException {
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { Q1 });
    csConf.setQueues(Q1_PATH, new String[] { Q2, Q3 });
    csConf.setCapacity(Q1_PATH, 100);
    csConf.setCapacity(Q2_PATH, 50);
    csConf.setCapacity(Q3_PATH, 50);
    conf = new YarnConfiguration(csConf);
    cs = new CapacityScheduler();
    RMContext rmContext = TestUtils.getMockRMContext();
    cs.setConf(conf);
    cs.setRMContext(rmContext);
    cs.init(conf);
    @SuppressWarnings("rawtypes") QueueStateManager stateManager = cs.getCapacitySchedulerQueueManager().getQueueStateManager();
    //by default, the state of both queues should be RUNNING
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
    // Stop Q2, and verify that Q2 transmits to STOPPED STATE
    stateManager.stopQueue(Q2);
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
    // Stop Q1, and verify that Q1, as well as its child: Q3,
    // transmits to STOPPED STATE
    stateManager.stopQueue(Q1);
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
    Assert.assertTrue(stateManager.canDelete(Q1));
    Assert.assertTrue(stateManager.canDelete(Q2));
    Assert.assertTrue(stateManager.canDelete(Q3));
    // Active Q2, it will fail.
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
    // Now active Q1
    stateManager.activateQueue(Q1);
    // Q1 should be in RUNNING state. Its children: Q2 and Q3
    // should still be in STOPPED state.
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
    // Now active Q2 and Q3
    stateManager.activateQueue(Q2);
    stateManager.activateQueue(Q3);
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
    Assert.assertFalse(stateManager.canDelete(Q1));
    Assert.assertFalse(stateManager.canDelete(Q2));
    Assert.assertFalse(stateManager.canDelete(Q3));
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    String userName = "testUser";
    cs.getQueue(Q2).submitApplication(appId, userName, Q2);
    FiCaSchedulerApp app = getMockApplication(appId, userName, Resources.createResource(4, 0));
    cs.getQueue(Q2).submitApplicationAttempt(app, userName);
    stateManager.stopQueue(Q1);
    Assert.assertEquals(QueueState.DRAINING, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
    cs.getQueue(Q2).finishApplicationAttempt(app, Q2);
    cs.getQueue(Q2).finishApplication(appId, userName);
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q1).getState());
    Assert.assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
}
Also used : RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) QueueStateManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueStateManager) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 94 with FiCaSchedulerApp

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp in project hadoop by apache.

the class TestWorkPreservingRMRestartForNodeLabel method checkAppResourceUsage.

private void checkAppResourceUsage(String partition, ApplicationId appId, MockRM rm, int expectedMemUsage) {
    CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
    FiCaSchedulerApp app = cs.getSchedulerApplications().get(appId).getCurrentAppAttempt();
    Assert.assertEquals(expectedMemUsage, app.getAppAttemptResourceUsage().getUsed(partition).getMemorySize());
}
Also used : FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)

Example 95 with FiCaSchedulerApp

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp in project hadoop by apache.

the class TestLeafQueue method testGetTotalPendingResourcesConsideringUserLimitOneUser.

@Test
public void testGetTotalPendingResourcesConsideringUserLimitOneUser() throws Exception {
    // Manipulate queue 'e'
    LeafQueue e = stubLeafQueue((LeafQueue) queues.get(E));
    // Allow queue 'e' to use 100% of cluster resources (max capacity).
    e.setMaxCapacity(1.0f);
    // When used queue resources goes above capacity (in this case, 1%), user
    // resource limit (used in calculating headroom) is calculated in small
    // increments to ensure that user-limit-percent can be met for all users in
    // a queue. Take user-limit-percent out of the equation so that user
    // resource limit will always be calculated to its max possible value.
    e.setUserLimit(1000);
    final String user_0 = "user_0";
    // Submit 2 applications for user_0
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
    FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, e, mock(ActiveUsersManager.class), spyRMContext);
    e.submitApplicationAttempt(app_0, user_0);
    final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
    FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, e, mock(ActiveUsersManager.class), spyRMContext);
    // same user
    e.submitApplicationAttempt(app_1, user_0);
    // Setup 1 node with 100GB of memory resources.
    String host_0 = "127.0.0.1";
    FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 100 * GB);
    Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
    Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
    final int numNodes = 1;
    Resource clusterResource = Resources.createResource(numNodes * (100 * GB), numNodes * 128);
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    // Pending resource requests for app_0 and app_1 total 5GB.
    Priority priority = TestUtils.createMockPriority(1);
    app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 3, true, priority, recordFactory)));
    app_1.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority, recordFactory)));
    // Start testing...
    // Assign 1st Container of 1GB
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    // With queue capacity set at 1% of 100GB and user-limit-factor set to 1.0,
    // all users (only user_0) queue 'e' should be able to consume 1GB.
    // The first container should be assigned to app_0 with no headroom left
    // even though user_0's apps are still asking for a total of 4GB.
    assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    // Assign 2nd container of 1GB
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    // user_0 has no headroom due to user-limit-factor of 1.0. However capacity
    // scheduler will assign one container more than user-limit-factor.
    // This container also went to app_0. Still with no neadroom even though
    // app_0 and app_1 are asking for a cumulative 3GB.
    assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    // Can't allocate 3rd container due to user-limit. Headroom still 0.
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    // Increase user-limit-factor from 1GB to 10GB (1% * 10 * 100GB = 10GB).
    // Pending for both app_0 and app_1 are still 3GB, so user-limit-factor
    // is no longer limiting the return value of
    // getTotalPendingResourcesConsideringUserLimit()
    e.setUserLimitFactor(10.0f);
    assertEquals(3 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    // app_0 is now satisified, app_1 is still asking for 2GB.
    assertEquals(3 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(2 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    // Get the last 2 containers for app_1, no more pending requests.
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps);
    assertEquals(3 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(2 * GB, app_1.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize());
    // Release each container from app_0
    for (RMContainer rmContainer : app_0.getLiveContainers()) {
        e.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
    }
    // Release each container from app_1
    for (RMContainer rmContainer : app_1.getLiveContainers()) {
        e.completedContainer(clusterResource, app_1, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
    }
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ActiveUsersManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager) Test(org.junit.Test)

Aggregations

FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)141 Test (org.junit.Test)97 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)60 Resource (org.apache.hadoop.yarn.api.records.Resource)53 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)51 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)49 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)48 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)47 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)47 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)40 Priority (org.apache.hadoop.yarn.api.records.Priority)40 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)35 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)34 NodeId (org.apache.hadoop.yarn.api.records.NodeId)31 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)31 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)30 ActiveUsersManager (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager)24 ArrayList (java.util.ArrayList)19 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)19 Container (org.apache.hadoop.yarn.api.records.Container)13