Search in sources :

Example 6 with SchedulerApplicationAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.

the class TestAMRestart method testAMRestartWithExistingContainers.

@Test(timeout = 30000)
public void testAMRestartWithExistingContainers() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    MockRM rm1 = new MockRM(conf);
    rm1.start();
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
    MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
    nm1.registerNode();
    MockNM nm2 = new MockNM("127.0.0.1:2351", 4089, rm1.getResourceTrackerService());
    nm2.registerNode();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    int NUM_CONTAINERS = 3;
    allocateContainers(nm1, am1, NUM_CONTAINERS);
    // launch the 2nd container, for testing running container transferred.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // launch the 3rd container, for testing container allocated by previous
    // attempt is completed by the next new attempt/
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.RUNNING);
    ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
    rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
    // 4th container still in AQUIRED state. for testing Acquired container is
    // always killed.
    ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
    rm1.waitForState(nm1, containerId4, RMContainerState.ACQUIRED);
    // 5th container is in Allocated state. for testing allocated container is
    // always killed.
    am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    ContainerId containerId5 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 5);
    rm1.waitForState(nm1, containerId5, RMContainerState.ALLOCATED);
    // 6th container is in Reserved state.
    am1.allocate("127.0.0.1", 6000, 1, new ArrayList<ContainerId>());
    ContainerId containerId6 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 6);
    nm1.nodeHeartbeat(true);
    SchedulerApplicationAttempt schedulerAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId6);
    while (schedulerAttempt.getReservedContainers().isEmpty()) {
        System.out.println("Waiting for container " + containerId6 + " to be reserved.");
        nm1.nodeHeartbeat(true);
        Thread.sleep(200);
    }
    // assert containerId6 is reserved.
    Assert.assertEquals(containerId6, schedulerAttempt.getReservedContainers().get(0).getContainerId());
    // fail the AM by sending CONTAINER_FINISHED event without registering.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    // wait for some time. previous AM's running containers should still remain
    // in scheduler even though am failed
    Thread.sleep(3000);
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // acquired/allocated containers are cleaned up.
    Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId4));
    Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId5));
    // wait for app to start a new attempt.
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    // assert this is a new AM.
    ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
    // launch the new AM
    MockAM am2 = rm1.launchAM(app1, rm1, nm1);
    RegisterApplicationMasterResponse registerResponse = am2.registerAppAttempt();
    // Assert two containers are running: container2 and container3;
    Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempts().size());
    boolean containerId2Exists = false, containerId3Exists = false;
    for (Container container : registerResponse.getContainersFromPreviousAttempts()) {
        if (container.getId().equals(containerId2)) {
            containerId2Exists = true;
        }
        if (container.getId().equals(containerId3)) {
            containerId3Exists = true;
        }
    }
    Assert.assertTrue(containerId2Exists && containerId3Exists);
    rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
    // complete container by sending the container complete event which has earlier
    // attempt's attemptId
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
    // Even though the completed container containerId3 event was sent to the
    // earlier failed attempt, new RMAppAttempt can also capture this container
    // info.
    // completed containerId4 is also transferred to the new attempt.
    RMAppAttempt newAttempt = app1.getRMAppAttempt(am2.getApplicationAttemptId());
    // 4 containers finished, acquired/allocated/reserved/completed.
    waitForContainersToFinish(4, newAttempt);
    boolean container3Exists = false, container4Exists = false, container5Exists = false, container6Exists = false;
    for (ContainerStatus status : newAttempt.getJustFinishedContainers()) {
        if (status.getContainerId().equals(containerId3)) {
            // containerId3 is the container ran by previous attempt but finished by the
            // new attempt.
            container3Exists = true;
        }
        if (status.getContainerId().equals(containerId4)) {
            // containerId4 is the Acquired Container killed by the previous attempt,
            // it's now inside new attempt's finished container list.
            container4Exists = true;
        }
        if (status.getContainerId().equals(containerId5)) {
            // containerId5 is the Allocated container killed by previous failed attempt.
            container5Exists = true;
        }
        if (status.getContainerId().equals(containerId6)) {
            // containerId6 is the reserved container killed by previous failed attempt.
            container6Exists = true;
        }
    }
    Assert.assertTrue(container3Exists && container4Exists && container5Exists && container6Exists);
    // New SchedulerApplicationAttempt also has the containers info.
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // record the scheduler attempt for testing.
    SchedulerApplicationAttempt schedulerNewAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId2);
    // finish this application
    MockRM.finishAMAndVerifyAppState(app1, rm1, nm1, am2);
    // the 2nd attempt released the 1st attempt's running container, when the
    // 2nd attempt finishes.
    Assert.assertFalse(schedulerNewAttempt.getLiveContainers().contains(containerId2));
    // all 4 normal containers finished.
    System.out.println("New attempt's just finished containers: " + newAttempt.getJustFinishedContainers());
    waitForContainersToFinish(5, newAttempt);
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Container(org.apache.hadoop.yarn.api.records.Container) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 7 with SchedulerApplicationAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.

the class TestCapacityScheduler method testAddAndRemoveAppFromCapacityScheduler.

@Test
public void testAddAndRemoveAppFromCapacityScheduler() throws Exception {
    CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
    setupQueueConfiguration(conf);
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    MockRM rm = new MockRM(conf);
    @SuppressWarnings("unchecked") AbstractYarnScheduler<SchedulerApplicationAttempt, SchedulerNode> cs = (AbstractYarnScheduler<SchedulerApplicationAttempt, SchedulerNode>) rm.getResourceScheduler();
    SchedulerApplication<SchedulerApplicationAttempt> app = TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(cs.getSchedulerApplications(), cs, "a1");
    Assert.assertEquals("a1", app.getQueue().getQueueName());
}
Also used : AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 8 with SchedulerApplicationAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.

the class TestWorkPreservingRMRestart method waitForNumContainersToRecover.

public static void waitForNumContainersToRecover(int num, MockRM rm, ApplicationAttemptId attemptId) throws Exception {
    AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm.getResourceScheduler();
    SchedulerApplicationAttempt attempt = scheduler.getApplicationAttempt(attemptId);
    while (attempt == null) {
        System.out.println("Wait for scheduler attempt " + attemptId + " to be created");
        Thread.sleep(200);
        attempt = scheduler.getApplicationAttempt(attemptId);
    }
    while (attempt.getLiveContainers().size() < num) {
        System.out.println("Wait for " + num + " containers to recover. currently: " + attempt.getLiveContainers().size());
        Thread.sleep(200);
    }
}
Also used : AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)

Example 9 with SchedulerApplicationAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.

the class TestWorkPreservingRMRestartForNodeLabel method waitForNumContainersToRecover.

@SuppressWarnings("rawtypes")
public static void waitForNumContainersToRecover(int num, MockRM rm, ApplicationAttemptId attemptId) throws Exception {
    AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm.getResourceScheduler();
    SchedulerApplicationAttempt attempt = scheduler.getApplicationAttempt(attemptId);
    while (attempt == null) {
        System.out.println("Wait for scheduler attempt " + attemptId + " to be created");
        Thread.sleep(200);
        attempt = scheduler.getApplicationAttempt(attemptId);
    }
    while (attempt.getLiveContainers().size() < num) {
        System.out.println("Wait for " + num + " containers to recover. currently: " + attempt.getLiveContainers().size());
        Thread.sleep(200);
    }
}
Also used : AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)

Example 10 with SchedulerApplicationAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt in project hadoop by apache.

the class OpportunisticContainerAllocatorAMService method allocateInternal.

@Override
protected void allocateInternal(ApplicationAttemptId appAttemptId, AllocateRequest request, AllocateResponse allocateResponse) throws YarnException {
    // Partition requests to GUARANTEED and OPPORTUNISTIC.
    OpportunisticContainerAllocator.PartitionedResourceRequests partitionedAsks = oppContainerAllocator.partitionAskList(request.getAskList());
    // Allocate OPPORTUNISTIC containers.
    SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) rmContext.getScheduler()).getApplicationAttempt(appAttemptId);
    OpportunisticContainerContext oppCtx = appAttempt.getOpportunisticContainerContext();
    oppCtx.updateNodeList(getLeastLoadedNodes());
    List<Container> oppContainers = oppContainerAllocator.allocateContainers(request.getResourceBlacklistRequest(), partitionedAsks.getOpportunistic(), appAttemptId, oppCtx, ResourceManager.getClusterTimeStamp(), appAttempt.getUser());
    // Create RMContainers and update the NMTokens.
    if (!oppContainers.isEmpty()) {
        handleNewContainers(oppContainers, false);
        appAttempt.updateNMTokens(oppContainers);
        addToAllocatedContainers(allocateResponse, oppContainers);
    }
    // Allocate GUARANTEED containers.
    request.setAskList(partitionedAsks.getGuaranteed());
    super.allocateInternal(appAttemptId, request, allocateResponse);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) OpportunisticContainerAllocator(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator)

Aggregations

SchedulerApplicationAttempt (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)12 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)11 Test (org.junit.Test)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)4 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 SchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 Container (org.apache.hadoop.yarn.api.records.Container)3 Resource (org.apache.hadoop.yarn.api.records.Resource)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)3 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)3 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)3 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)3 SchedulerApplication (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication)3 Configuration (org.apache.hadoop.conf.Configuration)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)2 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)2