use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestWorkPreservingRMRestart method testDynamicQueueRecovery.
// Test work preserving recovery of apps running under reservation.
// This involves:
// 1. Setting up a dynamic reservable queue,
// 2. Submitting an app to it,
// 3. Failing over RM,
// 4. Validating that the app is recovered post failover,
// 5. Check if all running containers are recovered,
// 6. Verify the scheduler state like attempt info,
// 7. Verify the queue/user metrics for the dynamic reservable queue.
@Test(timeout = 30000)
public void testDynamicQueueRecovery() throws Exception {
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
// 1. Set up dynamic reservable queue.
Configuration schedulerConf = getSchedulerDynamicConfiguration();
int containerMemory = 1024;
Resource containerResource = Resource.newInstance(containerMemory, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(schedulerConf);
rm1 = new MockRM(schedulerConf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
// 2. Run plan follower to update the added node & then submit app to
// dynamic queue.
rm1.getRMContext().getReservationSystem().synchronizePlan(ReservationSystemTestUtil.reservationQ, true);
RMApp app1 = rm1.submitApp(200, "dynamicQApp", UserGroupInformation.getCurrentUser().getShortUserName(), null, ReservationSystemTestUtil.getReservationQueueName());
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// clear queue metrics
rm1.clearQueueMetrics(app1);
// 3. Fail over (restart) RM.
rm2 = new MockRM(schedulerConf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// 4. Validate app is recovered post failover.
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
// Wait for RM to settle down on recovering containers.
waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
assertTrue(launchedContainers.contains(amContainer.getContainerId()));
assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
// 5. Check RMContainers are re-recreated and the container state is
// correct.
rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
// ********* check scheduler node state.*******
// 2 running containers.
Resource usedResources = Resources.multiply(containerResource, 2);
Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
// 2 launched containers, 1 completed container
assertEquals(2, schedulerNode1.getNumContainers());
assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
assertEquals(usedResources, schedulerNode1.getAllocatedResource());
Resource availableResources = Resources.subtract(nmResource, usedResources);
// 6. Verify the scheduler state like attempt info.
Map<ApplicationId, SchedulerApplication<SchedulerApplicationAttempt>> sa = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
SchedulerApplication<SchedulerApplicationAttempt> schedulerApp = sa.get(recoveredApp1.getApplicationId());
// 7. Verify the queue/user metrics for the dynamic reservable queue.
if (getSchedulerType() == SchedulerType.CAPACITY) {
checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
} else {
checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
}
// *********** check scheduler attempt state.********
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
// *********** check appSchedulingInfo state ***********
assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestContainerResizing method testSimpleDecreaseContainer.
@Test
public void testSimpleDecreaseContainer() throws Exception {
/**
* Application has a container running, try to decrease the container and
* check queue's usage and container resource will be updated.
*/
MockRM rm1 = new MockRM() {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB);
// app1 -> a1
RMApp app1 = rm1.submitApp(3 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
FiCaSchedulerApp app = TestUtils.getFiCaSchedulerApp(rm1, app1.getApplicationId());
checkUsedResource(rm1, "default", 3 * GB, null);
Assert.assertEquals(3 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
ContainerId containerId1 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
sentRMContainerLaunched(rm1, containerId1);
// am1 asks to change its AM container from 1GB to 3GB
AllocateResponse response = am1.sendContainerResizingRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, containerId1, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(1 * GB), null)));
verifyContainerDecreased(response, containerId1, 1 * GB);
checkUsedResource(rm1, "default", 1 * GB, null);
Assert.assertEquals(1 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
// Check if decreased containers added to RMNode
RMNodeImpl rmNode = (RMNodeImpl) rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
Collection<Container> decreasedContainers = rmNode.getToBeDecreasedContainers();
boolean rmNodeReceivedDecreaseContainer = false;
for (Container c : decreasedContainers) {
if (c.getId().equals(containerId1) && c.getResource().equals(Resources.createResource(1 * GB))) {
rmNodeReceivedDecreaseContainer = true;
}
}
Assert.assertTrue(rmNodeReceivedDecreaseContainer);
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestApplicationLifetimeMonitor method testApplicationLifetimeOnRMRestart.
@Test(timeout = 180000)
public void testApplicationLifetimeOnRMRestart() throws Exception {
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
nm1.nodeHeartbeat(true);
long appLifetime = 60L;
Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Re-start RM
MockRM rm2 = new MockRM(conf, memStore);
// make sure app has been unregistered with old RM else both will trigger
// Expire event
rm1.getRMContext().getRMAppLifetimeMonitor().unregisterApp(app1.getApplicationId(), ApplicationTimeoutType.LIFETIME);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
nm1.registerNode(Arrays.asList(amContainer, runningContainer), null);
// Wait for RM to settle down on recovering containers;
TestWorkPreservingRMRestart.waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
assertTrue(launchedContainers.contains(amContainer.getContainerId()));
assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
// check RMContainers are re-recreated and the container state is correct.
rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
// re register attempt to rm2
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt();
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.RUNNING);
// wait for app life time and application to be in killed state.
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.KILLED);
Assert.assertTrue("Application killed before lifetime value", recoveredApp1.getFinishTime() > (recoveredApp1.getSubmitTime() + appLifetime * 1000));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestWorkPreservingRMRestart method testSchedulerRecovery.
// Test common scheduler state including SchedulerAttempt, SchedulerNode,
// AppSchedulingInfo can be reconstructed via the container recovery reports
// on NM re-registration.
// Also test scheduler specific changes: i.e. Queue recovery-
// CSQueue/FSQueue/FifoQueue recovery respectively.
// Test Strategy: send 3 container recovery reports(AMContainer, running
// container, completed container) on NM re-registration, check the states of
// SchedulerAttempt, SchedulerNode etc. are updated accordingly.
@Test(timeout = 20000)
public void testSchedulerRecovery() throws Exception {
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
int containerMemory = 1024;
Resource containerResource = Resource.newInstance(containerMemory, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// clear queue metrics
rm1.clearQueueMetrics(app1);
// Re-start RM
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
RMAppAttempt loadedAttempt1 = recoveredApp1.getCurrentAppAttempt();
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
// Wait for RM to settle down on recovering containers;
waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
assertTrue(launchedContainers.contains(amContainer.getContainerId()));
assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
// check RMContainers are re-recreated and the container state is correct.
rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForContainerToComplete(loadedAttempt1, completedContainer);
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm2.getResourceScheduler();
SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId());
assertTrue("SchedulerNode#toString is not in expected format", schedulerNode1.toString().contains(schedulerNode1.getUnallocatedResource().toString()));
assertTrue("SchedulerNode#toString is not in expected format", schedulerNode1.toString().contains(schedulerNode1.getAllocatedResource().toString()));
// ********* check scheduler node state.*******
// 2 running containers.
Resource usedResources = Resources.multiply(containerResource, 2);
Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
assertTrue(schedulerNode1.isValidContainer(amContainer.getContainerId()));
assertTrue(schedulerNode1.isValidContainer(runningContainer.getContainerId()));
assertFalse(schedulerNode1.isValidContainer(completedContainer.getContainerId()));
// 2 launched containers, 1 completed container
assertEquals(2, schedulerNode1.getNumContainers());
assertEquals(Resources.subtract(nmResource, usedResources), schedulerNode1.getUnallocatedResource());
assertEquals(usedResources, schedulerNode1.getAllocatedResource());
Resource availableResources = Resources.subtract(nmResource, usedResources);
// ***** check queue state based on the underlying scheduler ********
Map<ApplicationId, SchedulerApplication> schedulerApps = ((AbstractYarnScheduler) rm2.getResourceScheduler()).getSchedulerApplications();
SchedulerApplication schedulerApp = schedulerApps.get(recoveredApp1.getApplicationId());
if (getSchedulerType() == SchedulerType.CAPACITY) {
checkCSQueue(rm2, schedulerApp, nmResource, nmResource, usedResources, 2);
} else {
checkFSQueue(rm2, schedulerApp, usedResources, availableResources);
}
// *********** check scheduler attempt state.********
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(amContainer.getContainerId())));
assertTrue(schedulerAttempt.getLiveContainers().contains(scheduler.getRMContainer(runningContainer.getContainerId())));
assertEquals(schedulerAttempt.getCurrentConsumption(), usedResources);
// *********** check appSchedulingInfo state ***********
assertEquals((1L << 40) + 1L, schedulerAttempt.getNewContainerId());
}
Aggregations