Search in sources :

Example 16 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestRMWebServices method checkSchedulerLogFileAndCleanup.

private void checkSchedulerLogFileAndCleanup() {
    String targetFile;
    ResourceScheduler scheduler = rm.getResourceScheduler();
    if (scheduler instanceof FairScheduler) {
        targetFile = "yarn-fair-scheduler-debug.log";
    } else if (scheduler instanceof CapacityScheduler) {
        targetFile = "yarn-capacity-scheduler-debug.log";
    } else {
        targetFile = "yarn-scheduler-debug.log";
    }
    File logFile = new File(System.getProperty("yarn.log.dir"), targetFile);
    assertTrue("scheduler log file doesn't exist", logFile.exists());
    FileUtils.deleteQuietly(logFile);
}
Also used : FairScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) File(java.io.File) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)

Example 17 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestRMWebApp method mockCapacityScheduler.

public static CapacityScheduler mockCapacityScheduler() throws IOException {
    // stolen from TestCapacityScheduler
    CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
    setupQueueConfiguration(conf);
    CapacityScheduler cs = new CapacityScheduler();
    cs.setConf(new YarnConfiguration());
    RMContext rmContext = new RMContextImpl(null, null, null, null, null, null, new RMContainerTokenSecretManager(conf), new NMTokenSecretManagerInRM(conf), new ClientToAMTokenSecretManagerInRM(), null);
    rmContext.setNodeLabelManager(new NullRMNodeLabelsManager());
    cs.setRMContext(rmContext);
    cs.init(conf);
    return cs;
}
Also used : RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) ClientToAMTokenSecretManagerInRM(org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) RMContainerTokenSecretManager(org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager) NMTokenSecretManagerInRM(org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM) NullRMNodeLabelsManager(org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) RMContextImpl(org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl)

Example 18 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestAMRestart method testPreemptedAMRestartOnRMRestart.

// Test RM restarts after AM container is preempted, new RM should not count
// AM preemption failure towards the max-retry-account and should be able to
// re-launch the AM.
@Test(timeout = 20000)
public void testPreemptedAMRestartOnRMRestart() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    // explicitly set max-am-retry count as 1.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app1 = rm1.submitApp(200);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    CapacityScheduler scheduler = (CapacityScheduler) rm1.getResourceScheduler();
    ContainerId amContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
    // Forcibly preempt the am container;
    scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    Assert.assertTrue(!attempt1.shouldCountTowardsMaxAttemptRetry());
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    // state store has 1 attempt stored.
    ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
    Assert.assertEquals(1, appState.getAttemptCount());
    // attempt stored has the preempted container exit status.
    Assert.assertEquals(ContainerExitStatus.PREEMPTED, appState.getAttempt(am1.getApplicationAttemptId()).getAMContainerExitStatus());
    // Restart rm.
    MockRM rm2 = new MockRM(conf, memStore);
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm1.registerNode();
    rm2.start();
    // Restarted RM should re-launch the am.
    MockAM am2 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
    MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am2);
    RMAppAttempt attempt2 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()).getCurrentAppAttempt();
    Assert.assertTrue(attempt2.shouldCountTowardsMaxAttemptRetry());
    Assert.assertEquals(ContainerExitStatus.INVALID, appState.getAttempt(am2.getApplicationAttemptId()).getAMContainerExitStatus());
    rm1.stop();
    rm2.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 19 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestAMRestart method testShouldNotCountFailureToMaxAttemptRetry.

// AM container preempted, nm disk failure
// should not be counted towards AM max retry count.
@Test(timeout = 100000)
public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    // explicitly set max-am-retry count as 1.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app1 = rm1.submitApp(200);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    CapacityScheduler scheduler = (CapacityScheduler) rm1.getResourceScheduler();
    ContainerId amContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
    // Preempt the first attempt;
    scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am1.getApplicationAttemptId());
    Assert.assertTrue(!attempt1.shouldCountTowardsMaxAttemptRetry());
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
    // AM should be restarted even though max-am-attempt is 1.
    MockAM am2 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
    RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
    // Preempt the second attempt.
    ContainerId amContainer2 = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
    scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer2));
    rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am2.getApplicationAttemptId());
    Assert.assertTrue(!attempt2.shouldCountTowardsMaxAttemptRetry());
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    MockAM am3 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
    RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
    // mimic NM disk_failure
    ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class);
    containerStatus.setContainerId(attempt3.getMasterContainer().getId());
    containerStatus.setDiagnostics("mimic NM disk_failure");
    containerStatus.setState(ContainerState.COMPLETE);
    containerStatus.setExitStatus(ContainerExitStatus.DISKS_FAILED);
    Map<ApplicationId, List<ContainerStatus>> conts = new HashMap<ApplicationId, List<ContainerStatus>>();
    conts.put(app1.getApplicationId(), Collections.singletonList(containerStatus));
    nm1.nodeHeartbeat(conts, true);
    rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am3.getApplicationAttemptId());
    Assert.assertTrue(!attempt3.shouldCountTowardsMaxAttemptRetry());
    Assert.assertEquals(ContainerExitStatus.DISKS_FAILED, appState.getAttempt(am3.getApplicationAttemptId()).getAMContainerExitStatus());
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    MockAM am4 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
    RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
    // create second NM, and register to rm1
    MockNM nm2 = new MockNM("127.0.0.1:2234", 8000, rm1.getResourceTrackerService());
    nm2.registerNode();
    // nm1 heartbeats to report unhealthy
    // This will mimic ContainerExitStatus.ABORT
    nm1.nodeHeartbeat(false);
    rm1.waitForState(am4.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am4.getApplicationAttemptId());
    Assert.assertTrue(!attempt4.shouldCountTowardsMaxAttemptRetry());
    Assert.assertEquals(ContainerExitStatus.ABORTED, appState.getAttempt(am4.getApplicationAttemptId()).getAMContainerExitStatus());
    // launch next AM in nm2
    MockAM am5 = rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 5, nm2);
    RMAppAttempt attempt5 = app1.getCurrentAppAttempt();
    // fail the AM normally
    nm2.nodeHeartbeat(am5.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am5.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler, am5.getApplicationAttemptId());
    Assert.assertTrue(attempt5.shouldCountTowardsMaxAttemptRetry());
    // AM should not be restarted.
    rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);
    Assert.assertEquals(5, app1.getAppAttempts().size());
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) ArrayList(java.util.ArrayList) List(java.util.List) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 20 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestSchedulerHealth method testCapacitySchedulerAllocation.

@Test
public void testCapacitySchedulerAllocation() throws Exception {
    setup();
    boolean isCapacityScheduler = resourceManager.getResourceScheduler() instanceof CapacityScheduler;
    assumeTrue("This test is only supported on Capacity Scheduler", isCapacityScheduler);
    // Register node1
    String host_0 = "host_0";
    NodeManager nm_0 = registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK, Resources.createResource(5 * 1024, 1));
    // ResourceRequest priorities
    Priority priority_0 = Priority.newInstance(0);
    Priority priority_1 = Priority.newInstance(1);
    // Submit an application
    Application application_0 = new Application("user_0", "default", resourceManager);
    application_0.submit();
    application_0.addNodeManager(host_0, 1234, nm_0);
    Resource capability_0_0 = Resources.createResource(1024, 1);
    application_0.addResourceRequestSpec(priority_1, capability_0_0);
    Resource capability_0_1 = Resources.createResource(2 * 1024, 1);
    application_0.addResourceRequestSpec(priority_0, capability_0_1);
    Task task_0_0 = new Task(application_0, priority_1, new String[] { host_0 });
    application_0.addTask(task_0_0);
    Task task_0_1 = new Task(application_0, priority_0, new String[] { host_0 });
    application_0.addTask(task_0_1);
    // Send resource requests to the scheduler
    application_0.schedule();
    // Send a heartbeat to kick the tires on the Scheduler
    nodeUpdate(nm_0);
    SchedulerHealth sh = ((CapacityScheduler) resourceManager.getResourceScheduler()).getSchedulerHealth();
    // Now SchedulerHealth records last container allocated, aggregated
    // allocation account will not be changed
    Assert.assertEquals(1, sh.getAllocationCount().longValue());
    Assert.assertEquals(Resource.newInstance(1 * 1024, 1), sh.getResourcesAllocated());
    Assert.assertEquals(2, sh.getAggregateAllocationCount().longValue());
    Assert.assertEquals("host_0:1234", sh.getLastAllocationDetails().getNodeId().toString());
    Assert.assertEquals("root.default", sh.getLastAllocationDetails().getQueue());
    Task task_0_2 = new Task(application_0, priority_0, new String[] { host_0 });
    application_0.addTask(task_0_2);
    application_0.schedule();
    nodeUpdate(nm_0);
    Assert.assertEquals(1, sh.getAllocationCount().longValue());
    Assert.assertEquals(Resource.newInstance(2 * 1024, 1), sh.getResourcesAllocated());
    Assert.assertEquals(3, sh.getAggregateAllocationCount().longValue());
    Assert.assertEquals("host_0:1234", sh.getLastAllocationDetails().getNodeId().toString());
    Assert.assertEquals("root.default", sh.getLastAllocationDetails().getQueue());
}
Also used : NodeManager(org.apache.hadoop.yarn.server.resourcemanager.NodeManager) Task(org.apache.hadoop.yarn.server.resourcemanager.Task) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) Application(org.apache.hadoop.yarn.server.resourcemanager.Application) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

CapacityScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)39 Test (org.junit.Test)21 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)15 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)12 IOException (java.io.IOException)9 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)8 ResourceScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler)8 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)7 ClientResponse (com.sun.jersey.api.client.ClientResponse)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)6 FairScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler)6 AccessControlException (org.apache.hadoop.security.AccessControlException)5 NodeId (org.apache.hadoop.yarn.api.records.NodeId)5 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)5 ArrayList (java.util.ArrayList)4 Container (org.apache.hadoop.yarn.api.records.Container)4 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)4 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)4 JSONObject (org.codehaus.jettison.json.JSONObject)4