Search in sources :

Example 1 with SchedulerApplication

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.

the class TestWorkPreservingRMRestart method testDynamicQueueRecovery.

// Test work preserving recovery of apps running under reservation.
// This involves:
// 1. Setting up a dynamic reservable queue,
// 2. Submitting an app to it,
// 3. Failing over RM,
// 4. Validating that the app is recovered post failover,
// 5. Check if all running containers are recovered,
// 6. Verify the scheduler state like attempt info,
// 7. Verify the queue/user metrics for the dynamic reservable queue.
@Test(timeout = 30000)
public void testDynamicQueueRecovery() throws Exception {
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    // 1. Set up dynamic reservable queue.
    Configuration schedulerConf = getSchedulerDynamicConfiguration();
    int containerMemory = 1024;
    Resource containerResource = Resource.newInstance(containerMemory, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(schedulerConf);
    rm1 = new MockRM(schedulerConf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    // 2. Run plan follower to update the added node & then submit app to
    // dynamic queue.
    rm1.getRMContext().getReservationSystem().synchronizePlan(ReservationSystemTestUtil.reservationQ, true);
    RMApp app1 = rm1.submitApp(200, "dynamicQApp", UserGroupInformation.getCurrentUser().getShortUserName(), null, ReservationSystemTestUtil.getReservationQueueName());
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // clear queue metrics
    rm1.clearQueueMetrics(app1);
    // 3. Fail over (restart) RM.
    rm2 = new MockRM(schedulerConf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // 4. Validate app is recovered post failover.
    RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
    // Wait for RM to settle down on recovering containers.
    waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
    assertTrue(launchedContainers.contains(amContainer.getContainerId()));
    assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
    // 5. Check RMContainers are re-recreated and the container state is
    // correct.
    rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
    AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
    SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
    // ********* check scheduler node state.*******
    // 2 running containers.
    Resource usedResources = Resources.multiply(containerResource, 2);
    Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
    assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
    assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
    assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
    // 2 launched containers, 1 completed container
    assertEquals(2, schedulerNode1.getNumContainers());
    assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
    assertEquals(usedResources, schedulerNode1.getAllocatedResource());
    Resource availableResources = Resources.subtract(nmResource, usedResources);
    // 6. Verify the scheduler state like attempt info.
    Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>> sa = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication<SchedulerApplicationAttempt> schedulerApp = sa.get(recoveredApp1.getApplicationId());
    // 7. Verify the queue/user metrics for the dynamic reservable queue.
    if (getSchedulerType() == SchedulerType.CAPACITY) {
        checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
    } else {
        checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
    }
    // *********** check scheduler attempt state.********
    SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
    assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
    // *********** check appSchedulingInfo state ***********
    assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) FairSchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) Resource(org.apache.hadoop.yarn.api.records.Resource) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 2 with SchedulerApplication

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.

the class CapacityScheduler method addApplicationOnRecovery.

private void addApplicationOnRecovery(ApplicationId applicationId, String queueName, String user, Priority priority) {
    try {
        writeLock.lock();
        CSQueue queue = getQueue(queueName);
        if (queue == null) {
            //not presently supported
            if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
                this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it was submitted to queue " + queueName + " which no longer exists after restart."));
                return;
            } else {
                String queueErrorMsg = "Queue named " + queueName + " missing during application recovery." + " Queue removal during recovery is not presently " + "supported by the capacity scheduler, please " + "restart with all queues configured" + " which were present before shutdown/restart.";
                LOG.fatal(queueErrorMsg);
                throw new QueueInvalidException(queueErrorMsg);
            }
        }
        if (!(queue instanceof LeafQueue)) {
            // queue, which is not supported for running apps.
            if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
                this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it was submitted to queue " + queueName + " which is no longer a leaf queue after restart."));
                return;
            } else {
                String queueErrorMsg = "Queue named " + queueName + " is no longer a leaf queue during application recovery." + " Changing a leaf queue to a parent queue during recovery is" + " not presently supported by the capacity scheduler. Please" + " restart with leaf queues before shutdown/restart continuing" + " as leaf queues.";
                LOG.fatal(queueErrorMsg);
                throw new QueueInvalidException(queueErrorMsg);
            }
        }
        // Submit to the queue
        try {
            queue.submitApplication(applicationId, user, queueName);
        } catch (AccessControlException ace) {
        // Ignore the exception for recovered app as the app was previously
        // accepted.
        }
        queue.getMetrics().submitApp(user);
        SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(queue, user, priority);
        applications.put(applicationId, application);
        LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queueName);
        if (LOG.isDebugEnabled()) {
            LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : RMAppEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) AccessControlException(org.apache.hadoop.security.AccessControlException) QueueInvalidException(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueInvalidException)

Example 3 with SchedulerApplication

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.

the class TestWorkPreservingRMRestart method testUAMRecoveryOnRMWorkPreservingRestart.

@Test(timeout = 600000)
public void testUAMRecoveryOnRMWorkPreservingRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the UAM
    RMApp app0 = rm1.submitApp(200, true);
    MockAM am0 = MockRM.launchUAM(app0, rm1, nm1);
    am0.registerAppAttempt();
    // Allocate containers to UAM
    int numContainers = 2;
    am0.allocate("127.0.0.1", 1000, numContainers, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    Assert.assertTrue(conts.isEmpty());
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    Assert.assertFalse(conts.isEmpty());
    // start new RM
    rm2 = new MockRM(conf, memStore);
    rm2.start();
    rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
    // recover app
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    RMApp recoveredApp = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    NMContainerStatus container1 = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus container2 = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    nm1.registerNode(Arrays.asList(container1, container2), null);
    // Wait for RM to settle down on recovering containers;
    waitForNumContainersToRecover(2, rm2, am0.getApplicationAttemptId());
    // retry registerApplicationMaster() after RM restart.
    am0.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    am0.registerAppAttempt(true);
    // Check if UAM is correctly recovered on restart
    rm2.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
    // Check if containers allocated to UAM are recovered
    Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication schedulerApp = schedulerApps.get(recoveredApp.getApplicationId());
    SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
    Assert.assertEquals(numContainers, schedulerAttempt.getLiveContainers().size());
    // Check if UAM is able to heart beat
    Assert.assertNotNull(am0.doHeartbeat());
    // Complete the UAM
    am0.unregisterAppAttempt(false);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
    rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
    Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, recoveredApp.getFinalApplicationStatus());
    // Restart RM once more to check UAM is not re-run
    MockRM rm3 = new MockRM(conf, memStore);
    rm3.start();
    recoveredApp = rm3.getRMContext().getRMApps().get(app0.getApplicationId());
    Assert.assertEquals(RMAppState.FINISHED, recoveredApp.getState());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) ArrayList(java.util.ArrayList) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 4 with SchedulerApplication

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.

the class TestWorkPreservingRMRestart method testSchedulerRecovery.

// Test common scheduler state including SchedulerAttempt, SchedulerNode,
// AppSchedulingInfo can be reconstructed via the container recovery reports
// on NM re-registration.
// Also test scheduler specific changes: i.e. Queue recovery-
// CSQueue/FSQueue/FifoQueue recovery respectively.
// Test Strategy: send 3 container recovery reports(AMContainer, running
// container, completed container) on NM re-registration, check the states of
// SchedulerAttempt, SchedulerNode etc. are updated accordingly.
@Test(timeout = 20000)
public void testSchedulerRecovery() throws Exception {
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    int containerMemory = 1024;
    Resource containerResource = Resource.newInstance(containerMemory, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app1 = rm1.submitApp(200);
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    // clear queue metrics
    rm1.clearQueueMetrics(app1);
    // Re-start RM
    rm2 = new MockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // recover app
    RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
    // Wait for RM to settle down on recovering containers;
    waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
    Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
    assertTrue(launchedContainers.contains(amContainer.getContainerId()));
    assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
    // check RMContainers are re-recreated and the container state is correct.
    rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
    rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
    AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
    SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
    assertTrue("SchedulerNode#toString is not in expected format", schedulerNode1.toString().contains(schedulerNode1.getUnallocatedResource().toString()));
    assertTrue("SchedulerNode#toString is not in expected format", schedulerNode1.toString().contains(schedulerNode1.getAllocatedResource().toString()));
    // ********* check scheduler node state.*******
    // 2 running containers.
    Resource usedResources = Resources.multiply(containerResource, 2);
    Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
    assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
    assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
    assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
    // 2 launched containers, 1 completed container
    assertEquals(2, schedulerNode1.getNumContainers());
    assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
    assertEquals(usedResources, schedulerNode1.getAllocatedResource());
    Resource availableResources = Resources.subtract(nmResource, usedResources);
    // ***** check queue state based on the underlying scheduler ********
    Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
    SchedulerApplication schedulerApp = schedulerApps.get(recoveredApp1.getApplicationId());
    if (getSchedulerType() == SchedulerType.CAPACITY) {
        checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
    } else {
        checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
    }
    // *********** check scheduler attempt state.********
    SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
    assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
    assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
    // *********** check appSchedulingInfo state ***********
    assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) Resource(org.apache.hadoop.yarn.api.records.Resource) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 5 with SchedulerApplication

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication in project hadoop by apache.

the class SLSCapacityScheduler method handle.

@Override
public void handle(SchedulerEvent schedulerEvent) {
    // metrics off
    if (!metricsON) {
        super.handle(schedulerEvent);
        return;
    }
    if (!running)
        running = true;
    // metrics on
    Timer.Context handlerTimer = null;
    Timer.Context operationTimer = null;
    NodeUpdateSchedulerEventWrapper eventWrapper;
    try {
        //if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
        if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE && schedulerEvent instanceof NodeUpdateSchedulerEvent) {
            eventWrapper = new NodeUpdateSchedulerEventWrapper((NodeUpdateSchedulerEvent) schedulerEvent);
            schedulerEvent = eventWrapper;
            updateQueueWithNodeUpdate(eventWrapper);
        } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
            // check if having AM Container, update resource usage information
            AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
            ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
            String queue = appQueueMap.get(appAttemptId);
            SchedulerAppReport app = super.getSchedulerAppInfo(appAttemptId);
            if (!app.getLiveContainers().isEmpty()) {
                // have 0 or 1
                // should have one container which is AM container
                RMContainer rmc = app.getLiveContainers().iterator().next();
                updateQueueMetrics(queue, rmc.getContainer().getResource().getMemorySize(), rmc.getContainer().getResource().getVirtualCores());
            }
        }
        handlerTimer = schedulerHandleTimer.time();
        operationTimer = schedulerHandleTimerMap.get(schedulerEvent.getType()).time();
        super.handle(schedulerEvent);
    } finally {
        if (handlerTimer != null)
            handlerTimer.stop();
        if (operationTimer != null)
            operationTimer.stop();
        schedulerHandleCounter.inc();
        schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
        if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
            SLSRunner.decreaseRemainingApps();
            AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
            ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
            appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
        } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
            AppAttemptAddedSchedulerEvent appAddEvent = (AppAttemptAddedSchedulerEvent) schedulerEvent;
            SchedulerApplication app = applications.get(appAddEvent.getApplicationAttemptId().getApplicationId());
            appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue().getQueueName());
        }
    }
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) Timer(com.codahale.metrics.Timer) SchedulerApplication(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulerAppReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport)

Aggregations

SchedulerApplication (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)5 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)5 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)4 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)4 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)4 RMAppEvent (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent)4 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)4 Test (org.junit.Test)4 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 Resource (org.apache.hadoop.yarn.api.records.Resource)3 SchedulerApplicationAttempt (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)3 DominantResourceCalculator (org.apache.hadoop.yarn.util.resource.DominantResourceCalculator)3 AccessControlException (org.apache.hadoop.security.AccessControlException)2 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)2 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)2 SchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode)2 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)2 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)2 Timer (com.codahale.metrics.Timer)1