Search in sources :

Example 36 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class FifoScheduler method removeNode.

private synchronized void removeNode(RMNode nodeInfo) {
    FiCaSchedulerNode node = nodeTracker.getNode(nodeInfo.getNodeID());
    if (node == null) {
        return;
    }
    // Kill running containers
    for (RMContainer container : node.getCopiedListOfRunningContainers()) {
        super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
    }
    nodeTracker.removeNode(nodeInfo.getNodeID());
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 37 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class FifoScheduler method addNode.

private synchronized void addNode(RMNode nodeManager) {
    FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager, usePortForNodeName);
    nodeTracker.addNode(schedulerNode);
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)

Example 38 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class FifoScheduler method nodeUpdate.

@Override
protected synchronized void nodeUpdate(RMNode nm) {
    super.nodeUpdate(nm);
    FiCaSchedulerNode node = (FiCaSchedulerNode) getNode(nm.getNodeID());
    if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext.isSchedulerReadyForAllocatingContainers()) {
        return;
    }
    if (Resources.greaterThanOrEqual(resourceCalculator, getClusterResource(), node.getUnallocatedResource(), minimumAllocation)) {
        LOG.debug("Node heartbeat " + nm.getNodeID() + " available resource = " + node.getUnallocatedResource());
        assignContainers(node);
        LOG.debug("Node after allocation " + nm.getNodeID() + " resource = " + node.getUnallocatedResource());
    }
    updateAvailableResourcesMetrics();
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)

Example 39 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class FifoScheduler method completedContainerInternal.

@Lock(FifoScheduler.class)
@Override
protected synchronized void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
    // Get the application for the finished container
    Container container = rmContainer.getContainer();
    FifoAppAttempt application = getCurrentAttemptForContainer(container.getId());
    ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId();
    // Get the node on which the container was allocated
    FiCaSchedulerNode node = (FiCaSchedulerNode) getNode(container.getNodeId());
    if (application == null) {
        LOG.info("Unknown application: " + appId + " released container " + container.getId() + " on node: " + node + " with event: " + event);
        return;
    }
    // Inform the application
    application.containerCompleted(rmContainer, containerStatus, event, RMNodeLabelsManager.NO_LABEL);
    // Inform the node
    node.releaseContainer(rmContainer.getContainerId(), false);
    // Update total usage
    Resources.subtractFrom(usedResource, container.getResource());
    LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Lock(org.apache.hadoop.yarn.server.utils.Lock)

Example 40 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class TestReservations method testAssignToQueue.

@Test
public void testAssignToQueue() throws Exception {
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
    setup(csConf);
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
    // Users
    final String user_0 = "user_0";
    // Submit applications
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
    FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    app_0 = spy(app_0);
    Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
    rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
    a.submitApplicationAttempt(app_0, user_0);
    final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
    FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    app_1 = spy(app_1);
    Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
    a.submitApplicationAttempt(app_1, user_0);
    // Setup some nodes
    String host_0 = "host_0";
    FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
    String host_1 = "host_1";
    FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
    String host_2 = "host_2";
    FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, DEFAULT_RACK, 0, 8 * GB);
    when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
    when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
    when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
    Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
    Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2);
    final int numNodes = 2;
    Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    // Setup resource-requests
    Priority priorityAM = TestUtils.createMockPriority(1);
    Priority priorityMap = TestUtils.createMockPriority(5);
    Priority priorityReduce = TestUtils.createMockPriority(10);
    app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 1, true, priorityAM, recordFactory)));
    app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 5 * GB, 2, true, priorityReduce, recordFactory)));
    app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 3 * GB, 2, true, priorityMap, recordFactory)));
    // Start testing...
    // Only AM
    TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
    assertEquals(2 * GB, a.getUsedResources().getMemorySize());
    assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
    assertEquals(14 * GB, a.getMetrics().getAvailableMB());
    assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
    // Only 1 map - simulating reduce
    TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
    assertEquals(5 * GB, a.getUsedResources().getMemorySize());
    assertEquals(5 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(5 * GB, a.getMetrics().getAllocatedMB());
    assertEquals(11 * GB, a.getMetrics().getAvailableMB());
    assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
    // Only 1 map to other node - simulating reduce
    TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
    assertEquals(8 * GB, a.getUsedResources().getMemorySize());
    assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(0 * GB, a.getMetrics().getReservedMB());
    assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
    assertEquals(8 * GB, a.getMetrics().getAvailableMB());
    assertEquals(null, node_0.getReservedContainer());
    assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
    // now add in reservations and make sure it continues if config set
    // allocate to queue so that the potential new capacity is greater then
    // absoluteMaxCapacity
    TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
    assertEquals(13 * GB, a.getUsedResources().getMemorySize());
    assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
    assertEquals(5 * GB, a.getMetrics().getReservedMB());
    assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
    assertEquals(3 * GB, a.getMetrics().getAvailableMB());
    assertEquals(3 * GB, app_0.getHeadroom().getMemorySize());
    assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
    assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
    ResourceLimits limits = new ResourceLimits(Resources.createResource(13 * GB));
    boolean res = a.canAssignToThisQueue(Resources.createResource(13 * GB), RMNodeLabelsManager.NO_LABEL, limits, Resources.createResource(3 * GB), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    assertTrue(res);
    // 16GB total, 13GB consumed (8 allocated, 5 reserved). asking for 5GB so we would have to
    // unreserve 2GB to get the total 5GB needed.
    // also note vcore checks not enabled
    assertEquals(0, limits.getHeadroom().getMemorySize());
    refreshQueuesTurnOffReservationsContLook(a, csConf);
    // should return false since reservations continue look is off.
    limits = new ResourceLimits(Resources.createResource(13 * GB));
    res = a.canAssignToThisQueue(Resources.createResource(13 * GB), RMNodeLabelsManager.NO_LABEL, limits, Resources.createResource(3 * GB), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    assertFalse(res);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) AMState(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ActiveUsersManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager) Test(org.junit.Test)

Aggregations

FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)79 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)47 Resource (org.apache.hadoop.yarn.api.records.Resource)46 Test (org.junit.Test)39 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)37 NodeId (org.apache.hadoop.yarn.api.records.NodeId)35 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)34 Priority (org.apache.hadoop.yarn.api.records.Priority)34 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)21 ActiveUsersManager (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager)20 ArrayList (java.util.ArrayList)14 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)11 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)10 SchedulerRequestKey (org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey)9 HashMap (java.util.HashMap)8 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)7 AMState (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState)7 Container (org.apache.hadoop.yarn.api.records.Container)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)5 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)5