use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestFairScheduler method testFairShareAndWeightsInNestedUserQueueRule.
@Test
public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"parentq\" type=\"parent\">");
out.println("<minResources>1024mb,0vcores</minResources>");
out.println("</queue>");
out.println("<queuePlacementPolicy>");
out.println("<rule name=\"nestedUserQueue\">");
out.println(" <rule name=\"specified\" create=\"false\" />");
out.println("</rule>");
out.println("<rule name=\"default\" />");
out.println("</queuePlacementPolicy>");
out.println("</allocations>");
out.close();
RMApp rmApp1 = new MockRMApp(0, 0, RMAppState.NEW);
RMApp rmApp2 = new MockRMApp(1, 1, RMAppState.NEW);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
int capacity = 16 * 1024;
// create node with 16 G
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(capacity), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
// user1,user2 submit their apps to parentq and create user queues
createSchedulingRequest(10 * 1024, "root.parentq", "user1");
createSchedulingRequest(10 * 1024, "root.parentq", "user2");
// user3 submits app in default queue
createSchedulingRequest(10 * 1024, "root.default", "user3");
scheduler.update();
scheduler.getQueueManager().getRootQueue().setSteadyFairShare(scheduler.getClusterResource());
scheduler.getQueueManager().getRootQueue().recomputeSteadyShares();
Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager().getLeafQueues();
for (FSLeafQueue leaf : leafQueues) {
if (leaf.getName().equals("root.parentq.user1") || leaf.getName().equals("root.parentq.user2")) {
// assert that the fair share is 1/4th node1's capacity
assertEquals(capacity / 4, leaf.getFairShare().getMemorySize());
// assert that the steady fair share is 1/4th node1's capacity
assertEquals(capacity / 4, leaf.getSteadyFairShare().getMemorySize());
// assert weights are equal for both the user queues
assertEquals(1.0, leaf.getWeights().getWeight(ResourceType.MEMORY), 0);
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestRMWebServicesSchedulerActivities method testAssignWithoutAvailableResource.
@Test
public void testAssignWithoutAvailableResource() throws Exception {
//Start RM so that it accepts app submissions
rm.start();
MockNM nm = new MockNM("127.0.0.1:1234", 1 * 1024, rm.getResourceTrackerService());
nm.registerNode();
try {
RMApp app1 = rm.submitApp(1024, "app1", "user1", null, "b1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm);
am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.UNDEFINED, "127.0.0.1", Resources.createResource(1024), 10), ResourceRequest.newInstance(Priority.UNDEFINED, "/default-rack", Resources.createResource(1024), 10), ResourceRequest.newInstance(Priority.UNDEFINED, "*", Resources.createResource(1024), 10)), null);
//Get JSON
WebResource r = resource();
MultivaluedMapImpl params = new MultivaluedMapImpl();
params.add("nodeId", "127.0.0.1");
ClientResponse response = r.path("ws").path("v1").path("cluster").path("scheduler/activities").queryParams(params).accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString());
JSONObject json = response.getEntity(JSONObject.class);
nm.nodeHeartbeat(true);
Thread.sleep(1000);
//Get JSON
response = r.path("ws").path("v1").path("cluster").path("scheduler/activities").queryParams(params).accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString());
json = response.getEntity(JSONObject.class);
verifyNumberOfAllocations(json, 0);
} finally {
rm.stop();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestWorkPreservingRMRestart method testDynamicQueueRecovery.
// Test work preserving recovery of apps running under reservation.
// This involves:
// 1. Setting up a dynamic reservable queue,
// 2. Submitting an app to it,
// 3. Failing over RM,
// 4. Validating that the app is recovered post failover,
// 5. Check if all running containers are recovered,
// 6. Verify the scheduler state like attempt info,
// 7. Verify the queue/user metrics for the dynamic reservable queue.
@Test(timeout = 30000)
public void testDynamicQueueRecovery() throws Exception {
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
// 1. Set up dynamic reservable queue.
Configuration schedulerConf = getSchedulerDynamicConfiguration();
int containerMemory = 1024;
Resource containerResource = Resource.newInstance(containerMemory, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(schedulerConf);
rm1 = new MockRM(schedulerConf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
// 2. Run plan follower to update the added node & then submit app to
// dynamic queue.
rm1.getRMContext().getReservationSystem().synchronizePlan(ReservationSystemTestUtil.reservationQ, true);
RMApp app1 = rm1.submitApp(200, "dynamicQApp", UserGroupInformation.getCurrentUser().getShortUserName(), null, ReservationSystemTestUtil.getReservationQueueName());
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// clear queue metrics
rm1.clearQueueMetrics(app1);
// 3. Fail over (restart) RM.
rm2 = new MockRM(schedulerConf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// 4. Validate app is recovered post failover.
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
// Wait for RM to settle down on recovering containers.
waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
assertTrue(launchedContainers.contains(amContainer.getContainerId()));
assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
// 5. Check RMContainers are re-recreated and the container state is
// correct.
rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
// ********* check scheduler node state.*******
// 2 running containers.
Resource usedResources = Resources.multiply(containerResource, 2);
Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
// 2 launched containers, 1 completed container
assertEquals(2, schedulerNode1.getNumContainers());
assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
assertEquals(usedResources, schedulerNode1.getAllocatedResource());
Resource availableResources = Resources.subtract(nmResource, usedResources);
// 6. Verify the scheduler state like attempt info.
Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>> sa = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
SchedulerApplication<SchedulerApplicationAttempt> schedulerApp = sa.get(recoveredApp1.getApplicationId());
// 7. Verify the queue/user metrics for the dynamic reservable queue.
if (getSchedulerType() == SchedulerType.CAPACITY) {
checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
} else {
checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
}
// *********** check scheduler attempt state.********
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
// *********** check appSchedulingInfo state ***********
assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestWorkPreservingRMRestart method testCapacityLeafQueueBecomesParentOnRecovery.
//Test behavior of an app if queue is changed from leaf to parent during
//recovery. Test case does following:
//1. Add an app to QueueB and start the attempt.
//2. Add 2 subqueues(QueueB1 and QueueB2) to QueueB, restart the RM, once with
// fail fast config as false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacityLeafQueueBecomesParentOnRecovery() throws Exception {
if (getSchedulerType() != SchedulerType.CAPACITY) {
return;
}
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfiguration(csConf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(csConf);
rm1 = new MockRM(csConf, memStore);
rm1.start();
MockNM nm = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
nm.registerNode();
// Submit an app to QueueB.
RMApp app = rm1.submitApp(1024, "app", USER_2, null, B);
MockRM.launchAndRegisterAM(app, rm1, nm);
assertEquals(rm1.getApplicationReport(app.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
// Take a copy of state store so that it can be reset to this state.
RMState state = memStore.loadState();
// Change scheduler config with child queues added to QueueB.
csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfigurationChildOfB(csConf);
String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which is no longer a leaf queue after restart.";
verifyAppRecoveryWithWrongQueueConfig(csConf, app, diags, memStore, state);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp in project hadoop by apache.
the class TestWorkPreservingRMRestart method testAppFailedToRenewTokenOnRecovery.
@Test(timeout = 30000)
public void testAppFailedToRenewTokenOnRecovery() throws Exception {
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockRM rm2 = new TestSecurityMockRM(conf, memStore) {
protected DelegationTokenRenewer createDelegationTokenRenewer() {
return new DelegationTokenRenewer() {
@Override
public void addApplicationSync(ApplicationId applicationId, Credentials ts, boolean shouldCancelAtEnd, String user) throws IOException {
throw new IOException("Token renew failed !!");
}
};
}
};
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.start();
NMContainerStatus containerStatus = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
nm1.registerNode(Arrays.asList(containerStatus), null);
// am re-register
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt(true);
rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
// Because the token expired, am could crash.
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm2.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
}
Aggregations