use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class FiCaSchedulerApp method findNodeToUnreserve.
@VisibleForTesting
public RMContainer findNodeToUnreserve(Resource clusterResource, FiCaSchedulerNode node, SchedulerRequestKey schedulerKey, Resource minimumUnreservedResource) {
try {
readLock.lock();
// need to unreserve some other container first
NodeId idToUnreserve = getNodeIdToUnreserve(schedulerKey, minimumUnreservedResource, rc, clusterResource);
if (idToUnreserve == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("checked to see if could unreserve for app but nothing " + "reserved that matches for this app");
}
return null;
}
FiCaSchedulerNode nodeToUnreserve = ((CapacityScheduler) scheduler).getNode(idToUnreserve);
if (nodeToUnreserve == null) {
LOG.error("node to unreserve doesn't exist, nodeid: " + idToUnreserve);
return null;
}
if (LOG.isDebugEnabled()) {
LOG.debug("unreserving for app: " + getApplicationId() + " on nodeId: " + idToUnreserve + " in order to replace reserved application and place it on node: " + node.getNodeID() + " needing: " + minimumUnreservedResource);
}
// headroom
Resources.addTo(getHeadroom(), nodeToUnreserve.getReservedContainer().getReservedResource());
return nodeToUnreserve.getReservedContainer();
} finally {
readLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class CapacityReservationSystem method reinitialize.
@Override
public void reinitialize(Configuration conf, RMContext rmContext) throws YarnException {
// Validate if the scheduler is capacity based
ResourceScheduler scheduler = rmContext.getScheduler();
if (!(scheduler instanceof CapacityScheduler)) {
throw new YarnRuntimeException("Class " + scheduler.getClass().getCanonicalName() + " not instance of " + CapacityScheduler.class.getCanonicalName());
}
capScheduler = (CapacityScheduler) scheduler;
this.conf = conf;
super.reinitialize(conf, rmContext);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class TestRMRestart method testRMRestartAfterPreemption.
@Test(timeout = 120000)
public void testRMRestartAfterPreemption() throws Exception {
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
if (!getSchedulerType().equals(SchedulerType.CAPACITY)) {
return;
}
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start RM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
int CONTAINER_MEMORY = 1024;
// create app and launch the AM
RMApp app0 = rm1.submitApp(CONTAINER_MEMORY);
MockAM am0 = MockRM.launchAM(app0, rm1, nm1);
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(cs, am0.getApplicationAttemptId());
for (int i = 0; i < 4; i++) {
am0 = MockRM.launchAM(app0, rm1, nm1);
am0.registerAppAttempt();
// get scheduler app
FiCaSchedulerApp schedulerAppAttempt = cs.getSchedulerApplications().get(app0.getApplicationId()).getCurrentAppAttempt();
// kill app0-attempt
cs.markContainerForKillable(schedulerAppAttempt.getRMContainer(app0.getCurrentAppAttempt().getMasterContainer().getId()));
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(cs, am0.getApplicationAttemptId());
}
am0 = MockRM.launchAM(app0, rm1, nm1);
am0.registerAppAttempt();
rm1.killApp(app0.getApplicationId());
rm1.waitForState(app0.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.KILLED);
MockRM rm2 = null;
// start RM2
try {
rm2 = new MockRM(conf, memStore);
rm2.start();
Assert.assertTrue("RM start successfully", true);
} catch (Exception e) {
LOG.debug("Exception on start", e);
Assert.fail("RM should start with out any issue");
} finally {
rm1.stop();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class TestRM method testNMTokenSentForNormalContainer.
// Test even if AM container is allocated with containerId not equal to 1, the
// following allocate requests from AM should be able to retrieve the
// corresponding NM Token.
@Test(timeout = 20000)
public void testNMTokenSentForNormalContainer() throws Exception {
conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName());
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 5120);
RMApp app = rm.submitApp(2000);
RMAppAttempt attempt = app.getCurrentAppAttempt();
// Call getNewContainerId to increase container Id so that the AM container
// Id doesn't equal to one.
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
cs.getApplicationAttempt(attempt.getAppAttemptId()).getNewContainerId();
MockAM am = MockRM.launchAM(app, rm, nm1);
// am container Id not equal to 1.
Assert.assertTrue(attempt.getMasterContainer().getId().getContainerId() != 1);
// NMSecretManager doesn't record the node on which the am is allocated.
Assert.assertFalse(rm.getRMContext().getNMTokenSecretManager().isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(), nm1.getNodeId()));
am.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
int NUM_CONTAINERS = 1;
List<Container> containers = new ArrayList<Container>();
// nmTokens keeps track of all the nmTokens issued in the allocate call.
List<NMToken> expectedNMTokens = new ArrayList<NMToken>();
// am1 allocate 1 container on nm1.
while (true) {
AllocateResponse response = am.allocate("127.0.0.1", 2000, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
containers.addAll(response.getAllocatedContainers());
expectedNMTokens.addAll(response.getNMTokens());
if (containers.size() == NUM_CONTAINERS) {
break;
}
Thread.sleep(200);
System.out.println("Waiting for container to be allocated.");
}
NodeId nodeId = expectedNMTokens.get(0).getNodeId();
// NMToken is sent for the allocated container.
Assert.assertEquals(nm1.getNodeId(), nodeId);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.
the class TestRMAdminService method testAdminRefreshQueuesWithFileSystemBasedConfigurationProvider.
@Test
public void testAdminRefreshQueuesWithFileSystemBasedConfigurationProvider() throws IOException, YarnException {
configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
//upload default configurations
uploadDefaultConfiguration();
try {
rm = new MockRM(configuration);
rm.init(configuration);
rm.start();
} catch (Exception ex) {
fail("Should not get any exceptions");
}
CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler();
int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications();
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
csConf.set(CapacitySchedulerConfiguration.MAXIMUM_SYSTEM_APPLICATIONS, "5000");
uploadConfiguration(csConf, "capacity-scheduler.xml");
rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance());
int maxAppsAfter = cs.getConfiguration().getMaximumSystemApplications();
Assert.assertEquals(maxAppsAfter, 5000);
Assert.assertTrue(maxAppsAfter != maxAppsBefore);
}
Aggregations