Search in sources :

Example 21 with QueueMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.

the class TestAlignedPlanner method setup.

@Before
public void setup() throws Exception {
    // Initialize random seed
    long seed = rand.nextLong();
    rand.setSeed(seed);
    Log.getLog().info("Running with seed: " + seed);
    // Set cluster parameters
    long timeWindow = 1000000L;
    int capacityMem = 100 * 1024;
    int capacityCores = 100;
    step = 60000L;
    Resource clusterCapacity = Resource.newInstance(capacityMem, capacityCores);
    String reservationQ = ReservationSystemTestUtil.getFullReservationQueueName();
    float instConstraint = 100;
    float avgConstraint = 100;
    ReservationSchedulerConfiguration conf = ReservationSystemTestUtil.createConf(reservationQ, timeWindow, instConstraint, avgConstraint);
    CapacityOverTimePolicy policy = new CapacityOverTimePolicy();
    policy.init(reservationQ, conf);
    QueueMetrics queueMetrics = mock(QueueMetrics.class);
    RMContext context = ReservationSystemTestUtil.createMockRMContext();
    conf.setInt(AlignedPlannerWithGreedy.SMOOTHNESS_FACTOR, AlignedPlannerWithGreedy.DEFAULT_SMOOTHNESS_FACTOR);
    // Set planning agent
    agent = new AlignedPlannerWithGreedy();
    agent.init(conf);
    // Create Plan
    plan = new InMemoryPlan(queueMetrics, policy, agent, clusterCapacity, step, res, minAlloc, maxAlloc, "dedicated", null, true, context);
}
Also used : AlignedPlannerWithGreedy(org.apache.hadoop.yarn.server.resourcemanager.reservation.planning.AlignedPlannerWithGreedy) InMemoryPlan(org.apache.hadoop.yarn.server.resourcemanager.reservation.InMemoryPlan) CapacityOverTimePolicy(org.apache.hadoop.yarn.server.resourcemanager.reservation.CapacityOverTimePolicy) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationSchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSchedulerConfiguration) Before(org.junit.Before)

Example 22 with QueueMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.

@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Container Completed in the NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
    Thread.sleep(200);
    // Verify that container has been removed..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertNull(rmContainer);
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req... this should result in update error
    // Since the container doesn't exist anymore..
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
    Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 23 with QueueMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.

the class TestRMWebServices method verifyClusterMetrics.

public void verifyClusterMetrics(int submittedApps, int completedApps, int reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores, int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB, int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int rebootedNodes, int activeNodes, int shutdownNodes) throws JSONException, Exception {
    ResourceScheduler rs = rm.getResourceScheduler();
    QueueMetrics metrics = rs.getRootQueueMetrics();
    ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
    long totalMBExpect = metrics.getAvailableMB() + metrics.getAllocatedMB();
    long totalVirtualCoresExpect = metrics.getAvailableVirtualCores() + metrics.getAllocatedVirtualCores();
    assertEquals("appsSubmitted doesn't match", metrics.getAppsSubmitted(), submittedApps);
    assertEquals("appsCompleted doesn't match", metrics.getAppsCompleted(), completedApps);
    assertEquals("reservedMB doesn't match", metrics.getReservedMB(), reservedMB);
    assertEquals("availableMB doesn't match", metrics.getAvailableMB(), availableMB);
    assertEquals("allocatedMB doesn't match", metrics.getAllocatedMB(), allocMB);
    assertEquals("reservedVirtualCores doesn't match", metrics.getReservedVirtualCores(), reservedVirtualCores);
    assertEquals("availableVirtualCores doesn't match", metrics.getAvailableVirtualCores(), availableVirtualCores);
    assertEquals("allocatedVirtualCores doesn't match", totalVirtualCoresExpect, allocVirtualCores);
    assertEquals("containersAllocated doesn't match", 0, containersAlloc);
    assertEquals("totalMB doesn't match", totalMBExpect, totalMB);
    assertEquals("totalNodes doesn't match", clusterMetrics.getNumActiveNMs() + clusterMetrics.getNumLostNMs() + clusterMetrics.getNumDecommisionedNMs() + clusterMetrics.getNumRebootedNMs() + clusterMetrics.getUnhealthyNMs(), totalNodes);
    assertEquals("lostNodes doesn't match", clusterMetrics.getNumLostNMs(), lostNodes);
    assertEquals("unhealthyNodes doesn't match", clusterMetrics.getUnhealthyNMs(), unhealthyNodes);
    assertEquals("decommissionedNodes doesn't match", clusterMetrics.getNumDecommisionedNMs(), decommissionedNodes);
    assertEquals("rebootedNodes doesn't match", clusterMetrics.getNumRebootedNMs(), rebootedNodes);
    assertEquals("activeNodes doesn't match", clusterMetrics.getNumActiveNMs(), activeNodes);
    assertEquals("shutdownNodes doesn't match", clusterMetrics.getNumShutdownNMs(), shutdownNodes);
}
Also used : QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) ClusterMetrics(org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler)

Example 24 with QueueMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.

the class TestWorkPreservingRMRestart method testCapacitySchedulerRecovery.

// Test CS recovery with multi-level queues and multi-users:
// 1. setup 2 NMs each with 8GB memory;
// 2. setup 2 level queues: Default -> (QueueA, QueueB)
// 3. User1 submits 2 apps on QueueA
// 4. User2 submits 1 app  on QueueB
// 5. AM and each container has 1GB memory
// 6. Restart RM.
// 7. nm1 re-syncs back containers belong to user1
// 8. nm2 re-syncs back containers belong to user2.
// 9. Assert the parent queue and 2 leaf queues state and the metrics.
// 10. Assert each user's consumption inside the queue.
@Test(timeout = 30000)
public void testCapacitySchedulerRecovery() throws Exception {
    if (getSchedulerType() != SchedulerType.CAPACITY) {
        return;
    }
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfiguration(csConf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(csConf);
    rm1 = new MockRM(csConf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A);
    MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1);
    RMApp app1_2 = rm1.submitApp(1024, "app1_2", USER_1, null, A);
    MockAM am1_2 = MockRM.launchAndRegisterAM(app1_2, rm1, nm2);
    RMApp app2 = rm1.submitApp(1024, "app2", USER_2, null, B);
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // clear queue metrics
    rm1.clearQueueMetrics(app1_1);
    rm1.clearQueueMetrics(app1_2);
    rm1.clearQueueMetrics(app2);
    csConf.set(PREFIX + "root.Default.QueueB.state", "STOPPED");
    // Re-start RM
    rm2 = new MockRM(csConf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());
    List<NMContainerStatus> am1_1Containers = createNMContainerStatusForApp(am1_1);
    List<NMContainerStatus> am1_2Containers = createNMContainerStatusForApp(am1_2);
    am1_1Containers.addAll(am1_2Containers);
    nm1.registerNode(am1_1Containers, null);
    List<NMContainerStatus> am2Containers = createNMContainerStatusForApp(am2);
    nm2.registerNode(am2Containers, null);
    // Wait for RM to settle down on recovering containers;
    waitForNumContainersToRecover(2, rm2, am1_1.getApplicationAttemptId());
    waitForNumContainersToRecover(2, rm2, am1_2.getApplicationAttemptId());
    waitForNumContainersToRecover(2, rm2, am2.getApplicationAttemptId());
    // Calculate each queue's resource usage.
    Resource containerResource = Resource.newInstance(1024, 1);
    Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
    Resource clusterResource = Resources.multiply(nmResource, 2);
    Resource q1Resource = Resources.multiply(clusterResource, 0.5);
    Resource q2Resource = Resources.multiply(clusterResource, 0.5);
    Resource q1UsedResource = Resources.multiply(containerResource, 4);
    Resource q2UsedResource = Resources.multiply(containerResource, 2);
    Resource totalUsedResource = Resources.add(q1UsedResource, q2UsedResource);
    Resource q1availableResources = Resources.subtract(q1Resource, q1UsedResource);
    Resource q2availableResources = Resources.subtract(q2Resource, q2UsedResource);
    Resource totalAvailableResource = Resources.add(q1availableResources, q2availableResources);
    Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication schedulerApp1_1 = schedulerApps.get(app1_1.getApplicationId());
    // assert queue A state.
    checkCSLeafQueue(rm2, schedulerApp1_1, clusterResource, q1Resource, q1UsedResource, 4);
    QueueMetrics queue1Metrics = schedulerApp1_1.getQueue().getMetrics();
    assertMetrics(queue1Metrics, 2, 0, 2, 0, 4, q1availableResources.getMemorySize(), q1availableResources.getVirtualCores(), q1UsedResource.getMemorySize(), q1UsedResource.getVirtualCores());
    // assert queue B state.
    SchedulerApplication schedulerApp2 = schedulerApps.get(app2.getApplicationId());
    checkCSLeafQueue(rm2, schedulerApp2, clusterResource, q2Resource, q2UsedResource, 2);
    QueueMetrics queue2Metrics = schedulerApp2.getQueue().getMetrics();
    assertMetrics(queue2Metrics, 1, 0, 1, 0, 2, q2availableResources.getMemorySize(), q2availableResources.getVirtualCores(), q2UsedResource.getMemorySize(), q2UsedResource.getVirtualCores());
    // assert parent queue state.
    LeafQueue leafQueue = (LeafQueue) schedulerApp2.getQueue();
    ParentQueue parentQueue = (ParentQueue) leafQueue.getParent();
    checkParentQueue(parentQueue, 6, totalUsedResource, (float) 6 / 16, (float) 6 / 16);
    assertMetrics(parentQueue.getMetrics(), 3, 0, 3, 0, 6, totalAvailableResource.getMemorySize(), totalAvailableResource.getVirtualCores(), totalUsedResource.getMemorySize(), totalUsedResource.getVirtualCores());
}
Also used : FSParentQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSParentQueue) ParentQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) Resource(org.apache.hadoop.yarn.api.records.Resource) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 25 with QueueMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.

@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req again... this should result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
    // Verify that the Container is still in RUNNING state wrt RM..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation :
    // Allocated cores+mem should have increased, available should decrease
    verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

QueueMetrics (org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics)25 Test (org.junit.Test)14 Resource (org.apache.hadoop.yarn.api.records.Resource)10 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)9 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)7 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)7 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)7 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)7 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)6 Container (org.apache.hadoop.yarn.api.records.Container)5 Before (org.junit.Before)5 ArrayList (java.util.ArrayList)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)4 HashMap (java.util.HashMap)3 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)3 NodeId (org.apache.hadoop.yarn.api.records.NodeId)3 DistributedSchedulingAllocateResponse (org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse)3 InMemoryPlan (org.apache.hadoop.yarn.server.resourcemanager.reservation.InMemoryPlan)3 ReservationSchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSchedulerConfiguration)3