Search in sources :

Example 6 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class TestWorkPreservingRMRestart method checkCSQueue.

private void checkCSQueue(MockRM rm, SchedulerApplication<SchedulerApplicationAttempt> app, Resource clusterResource, Resource queueResource, Resource usedResource, int numContainers) throws Exception {
    checkCSLeafQueue(rm, app, clusterResource, queueResource, usedResource, numContainers);
    LeafQueue queue = (LeafQueue) app.getQueue();
    Resource availableResources = Resources.subtract(queueResource, usedResource);
    // ************ check app headroom ****************
    SchedulerApplicationAttempt schedulerAttempt = app.getCurrentAppAttempt();
    assertEquals(availableResources, schedulerAttempt.getHeadroom());
    // ************* check Queue metrics ************
    QueueMetrics queueMetrics = queue.getMetrics();
    assertMetrics(queueMetrics, 1, 0, 1, 0, 2, availableResources.getMemorySize(), availableResources.getVirtualCores(), usedResource.getMemorySize(), usedResource.getVirtualCores());
    // ************ check user metrics ***********
    QueueMetrics userMetrics = queueMetrics.getUserMetrics(app.getUser());
    assertMetrics(userMetrics, 1, 0, 1, 0, 2, availableResources.getMemorySize(), availableResources.getVirtualCores(), usedResource.getMemorySize(), usedResource.getVirtualCores());
}
Also used : QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) Resource(org.apache.hadoop.yarn.api.records.Resource) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)

Example 7 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class TestWorkPreservingRMRestart method testCapacitySchedulerRecovery.

// Test CS recovery with multi-level queues and multi-users:
// 1. setup 2 NMs each with 8GB memory;
// 2. setup 2 level queues: Default -> (QueueA, QueueB)
// 3. User1 submits 2 apps on QueueA
// 4. User2 submits 1 app  on QueueB
// 5. AM and each container has 1GB memory
// 6. Restart RM.
// 7. nm1 re-syncs back containers belong to user1
// 8. nm2 re-syncs back containers belong to user2.
// 9. Assert the parent queue and 2 leaf queues state and the metrics.
// 10. Assert each user's consumption inside the queue.
@Test(timeout = 30000)
public void testCapacitySchedulerRecovery() throws Exception {
    if (getSchedulerType() != SchedulerType.CAPACITY) {
        return;
    }
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfiguration(csConf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(csConf);
    rm1 = new MockRM(csConf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A);
    MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1);
    RMApp app1_2 = rm1.submitApp(1024, "app1_2", USER_1, null, A);
    MockAM am1_2 = MockRM.launchAndRegisterAM(app1_2, rm1, nm2);
    RMApp app2 = rm1.submitApp(1024, "app2", USER_2, null, B);
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // clear queue metrics
    rm1.clearQueueMetrics(app1_1);
    rm1.clearQueueMetrics(app1_2);
    rm1.clearQueueMetrics(app2);
    csConf.set(PREFIX + "root.Default.QueueB.state", "STOPPED");
    // Re-start RM
    rm2 = new MockRM(csConf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());
    List<NMContainerStatus> am1_1Containers = createNMContainerStatusForApp(am1_1);
    List<NMContainerStatus> am1_2Containers = createNMContainerStatusForApp(am1_2);
    am1_1Containers.addAll(am1_2Containers);
    nm1.registerNode(am1_1Containers, null);
    List<NMContainerStatus> am2Containers = createNMContainerStatusForApp(am2);
    nm2.registerNode(am2Containers, null);
    // Wait for RM to settle down on recovering containers;
    waitForNumContainersToRecover(2, rm2, am1_1.getApplicationAttemptId());
    waitForNumContainersToRecover(2, rm2, am1_2.getApplicationAttemptId());
    waitForNumContainersToRecover(2, rm2, am2.getApplicationAttemptId());
    // Calculate each queue's resource usage.
    Resource containerResource = Resource.newInstance(1024, 1);
    Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
    Resource clusterResource = Resources.multiply(nmResource, 2);
    Resource q1Resource = Resources.multiply(clusterResource, 0.5);
    Resource q2Resource = Resources.multiply(clusterResource, 0.5);
    Resource q1UsedResource = Resources.multiply(containerResource, 4);
    Resource q2UsedResource = Resources.multiply(containerResource, 2);
    Resource totalUsedResource = Resources.add(q1UsedResource, q2UsedResource);
    Resource q1availableResources = Resources.subtract(q1Resource, q1UsedResource);
    Resource q2availableResources = Resources.subtract(q2Resource, q2UsedResource);
    Resource totalAvailableResource = Resources.add(q1availableResources, q2availableResources);
    Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication schedulerApp1_1 = schedulerApps.get(app1_1.getApplicationId());
    // assert queue A state.
    checkCSLeafQueue(rm2, schedulerApp1_1, clusterResource, q1Resource, q1UsedResource, 4);
    QueueMetrics queue1Metrics = schedulerApp1_1.getQueue().getMetrics();
    assertMetrics(queue1Metrics, 2, 0, 2, 0, 4, q1availableResources.getMemorySize(), q1availableResources.getVirtualCores(), q1UsedResource.getMemorySize(), q1UsedResource.getVirtualCores());
    // assert queue B state.
    SchedulerApplication schedulerApp2 = schedulerApps.get(app2.getApplicationId());
    checkCSLeafQueue(rm2, schedulerApp2, clusterResource, q2Resource, q2UsedResource, 2);
    QueueMetrics queue2Metrics = schedulerApp2.getQueue().getMetrics();
    assertMetrics(queue2Metrics, 1, 0, 1, 0, 2, q2availableResources.getMemorySize(), q2availableResources.getVirtualCores(), q2UsedResource.getMemorySize(), q2UsedResource.getVirtualCores());
    // assert parent queue state.
    LeafQueue leafQueue = (LeafQueue) schedulerApp2.getQueue();
    ParentQueue parentQueue = (ParentQueue) leafQueue.getParent();
    checkParentQueue(parentQueue, 6, totalUsedResource, (float) 6 / 16, (float) 6 / 16);
    assertMetrics(parentQueue.getMetrics(), 3, 0, 3, 0, 6, totalAvailableResource.getMemorySize(), totalAvailableResource.getVirtualCores(), totalUsedResource.getMemorySize(), totalUsedResource.getVirtualCores());
}
Also used : FSParentQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSParentQueue) ParentQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) Resource(org.apache.hadoop.yarn.api.records.Resource) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 8 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class FiCaSchedulerApp method getActivedAppDiagnosticMessage.

protected void getActivedAppDiagnosticMessage(StringBuilder diagnosticMessage) {
    LeafQueue queue = getCSLeafQueue();
    QueueCapacities queueCapacities = queue.getQueueCapacities();
    diagnosticMessage.append(" Details : AM Partition = ");
    diagnosticMessage.append(appAMNodePartitionName.isEmpty() ? NodeLabel.DEFAULT_NODE_LABEL_PARTITION : appAMNodePartitionName);
    diagnosticMessage.append(" ; ");
    diagnosticMessage.append("Partition Resource = ");
    diagnosticMessage.append(rmContext.getNodeLabelManager().getResourceByLabel(appAMNodePartitionName, Resources.none()));
    diagnosticMessage.append(" ; ");
    diagnosticMessage.append("Queue's Absolute capacity = ");
    diagnosticMessage.append(queueCapacities.getAbsoluteCapacity(appAMNodePartitionName) * 100);
    diagnosticMessage.append(" % ; ");
    diagnosticMessage.append("Queue's Absolute used capacity = ");
    diagnosticMessage.append(queueCapacities.getAbsoluteUsedCapacity(appAMNodePartitionName) * 100);
    diagnosticMessage.append(" % ; ");
    diagnosticMessage.append("Queue's Absolute max capacity = ");
    diagnosticMessage.append(queueCapacities.getAbsoluteMaximumCapacity(appAMNodePartitionName) * 100);
    diagnosticMessage.append(" % ; ");
}
Also used : QueueCapacities(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue)

Example 9 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class CapacitySchedulerInfo method getQueues.

protected CapacitySchedulerQueueInfoList getQueues(CSQueue parent) {
    CapacitySchedulerQueueInfoList queuesInfo = new CapacitySchedulerQueueInfoList();
    // JAXB marashalling leads to situation where the "type" field injected
    // for JSON changes from string to array depending on order of printing
    // Issue gets fixed if all the leaf queues are marshalled before the
    // non-leaf queues. See YARN-4785 for more details.
    List<CSQueue> childQueues = new ArrayList<>();
    List<CSQueue> childLeafQueues = new ArrayList<>();
    List<CSQueue> childNonLeafQueues = new ArrayList<>();
    for (CSQueue queue : parent.getChildQueues()) {
        if (queue instanceof LeafQueue) {
            childLeafQueues.add(queue);
        } else {
            childNonLeafQueues.add(queue);
        }
    }
    childQueues.addAll(childLeafQueues);
    childQueues.addAll(childNonLeafQueues);
    for (CSQueue queue : childQueues) {
        CapacitySchedulerQueueInfo info;
        if (queue instanceof LeafQueue) {
            info = new CapacitySchedulerLeafQueueInfo((LeafQueue) queue);
        } else {
            info = new CapacitySchedulerQueueInfo(queue);
            info.queues = getQueues(queue);
        }
        queuesInfo.addToQueueInfoList(info);
    }
    return queuesInfo;
}
Also used : ArrayList(java.util.ArrayList) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) CSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue)

Example 10 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class FiCaSchedulerApp method getPendingAppDiagnosticMessage.

protected void getPendingAppDiagnosticMessage(StringBuilder diagnosticMessage) {
    LeafQueue queue = getCSLeafQueue();
    diagnosticMessage.append(" Details : AM Partition = ");
    diagnosticMessage.append(appAMNodePartitionName.isEmpty() ? NodeLabel.DEFAULT_NODE_LABEL_PARTITION : appAMNodePartitionName);
    diagnosticMessage.append("; ");
    diagnosticMessage.append("AM Resource Request = ");
    diagnosticMessage.append(getAMResource(appAMNodePartitionName));
    diagnosticMessage.append("; ");
    diagnosticMessage.append("Queue Resource Limit for AM = ");
    diagnosticMessage.append(queue.getAMResourceLimitPerPartition(appAMNodePartitionName));
    diagnosticMessage.append("; ");
    diagnosticMessage.append("User AM Resource Limit of the queue = ");
    diagnosticMessage.append(queue.getUserAMResourceLimitPerPartition(appAMNodePartitionName));
    diagnosticMessage.append("; ");
    diagnosticMessage.append("Queue AM Resource Usage = ");
    diagnosticMessage.append(queue.getQueueResourceUsage().getAMUsed(appAMNodePartitionName));
    diagnosticMessage.append("; ");
}
Also used : LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue)

Aggregations

LeafQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue)17 Resource (org.apache.hadoop.yarn.api.records.Resource)9 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)7 ArrayList (java.util.ArrayList)5 Matchers.anyString (org.mockito.Matchers.anyString)5 TreeSet (java.util.TreeSet)4 HashMap (java.util.HashMap)3 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)3 ResourceUsage (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage)3 CSQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue)3 ParentQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue)3 DominantResourceCalculator (org.apache.hadoop.yarn.util.resource.DominantResourceCalculator)3 Test (org.junit.Test)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 QueueMetrics (org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics)2 CapacityScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)2 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)2