use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestApplicationLimitsByPartition method testHeadroom.
@Test
public void testHeadroom() throws Exception {
/*
* Test Case: Verify Headroom calculated is sum of headrooms for each
* partition requested. So submit a app with requests for default partition
* and 'x' partition, so the total headroom for the user should be sum of
* the head room for both labels.
*/
simpleNodeLabelMappingToManager();
CapacitySchedulerConfiguration csConf = (CapacitySchedulerConfiguration) TestUtils.getComplexConfigurationWithQueueLabels(conf);
final String A1 = CapacitySchedulerConfiguration.ROOT + ".a" + ".a1";
final String B2 = CapacitySchedulerConfiguration.ROOT + ".b" + ".b2";
csConf.setUserLimit(A1, 25);
csConf.setUserLimit(B2, 25);
YarnConfiguration conf = new YarnConfiguration();
CapacitySchedulerContext csContext = mock(CapacitySchedulerContext.class);
when(csContext.getConfiguration()).thenReturn(csConf);
when(csContext.getConf()).thenReturn(conf);
when(csContext.getMinimumResourceCapability()).thenReturn(Resources.createResource(GB));
when(csContext.getMaximumResourceCapability()).thenReturn(Resources.createResource(16 * GB));
when(csContext.getResourceCalculator()).thenReturn(resourceCalculator);
RMContext rmContext = TestUtils.getMockRMContext();
RMContext spyRMContext = spy(rmContext);
when(spyRMContext.getNodeLabelManager()).thenReturn(mgr);
when(csContext.getRMContext()).thenReturn(spyRMContext);
mgr.activateNode(NodeId.newInstance("h0", 0), // default Label
Resource.newInstance(160 * GB, 16));
mgr.activateNode(NodeId.newInstance("h1", 0), // label x
Resource.newInstance(160 * GB, 16));
mgr.activateNode(NodeId.newInstance("h2", 0), // label y
Resource.newInstance(160 * GB, 16));
// Say cluster has 100 nodes of 16G each
Resource clusterResource = Resources.createResource(160 * GB);
when(csContext.getClusterResource()).thenReturn(clusterResource);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue rootQueue = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, "root", queues, queues, TestUtils.spyHook);
ResourceUsage queueResUsage = rootQueue.getQueueResourceUsage();
when(csContext.getClusterResourceUsage()).thenReturn(queueResUsage);
// Manipulate queue 'a'
LeafQueue queue = TestLeafQueue.stubLeafQueue((LeafQueue) queues.get("b2"));
queue.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
String rack_0 = "rack_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode("h0", rack_0, 0, 160 * GB);
FiCaSchedulerNode node_1 = TestUtils.getMockNode("h1", rack_0, 0, 160 * GB);
final String user_0 = "user_0";
final String user_1 = "user_1";
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
ConcurrentMap<ApplicationId, RMApp> spyApps = spy(new ConcurrentHashMap<ApplicationId, RMApp>());
RMApp rmApp = mock(RMApp.class);
ResourceRequest amResourceRequest = mock(ResourceRequest.class);
Resource amResource = Resources.createResource(0, 0);
when(amResourceRequest.getCapability()).thenReturn(amResource);
when(rmApp.getAMResourceRequest()).thenReturn(amResourceRequest);
Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId) Matchers.any());
when(spyRMContext.getRMApps()).thenReturn(spyApps);
RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class);
when(rmApp.getRMAppAttempt((ApplicationAttemptId) Matchers.any())).thenReturn(rmAppAttempt);
when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt);
Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId) Matchers.any());
Mockito.doReturn(true).when(spyApps).containsKey((ApplicationId) Matchers.any());
Priority priority_1 = TestUtils.createMockPriority(1);
// Submit first application with some resource-requests from user_0,
// and check headroom
final ApplicationAttemptId appAttemptId_0_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0_0 = new FiCaSchedulerApp(appAttemptId_0_0, user_0, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_0_0, user_0);
List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
app_0_0_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_0_0.updateResourceRequests(app_0_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
//head room = queue capacity = 50 % 90% 160 GB * 0.25 (UL)
Resource expectedHeadroom = Resources.createResource((int) (0.5 * 0.9 * 160 * 0.25) * GB, 1);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
// Submit second application from user_0, check headroom
final ApplicationAttemptId appAttemptId_0_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_0_1 = new FiCaSchedulerApp(appAttemptId_0_1, user_0, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_0_1, user_0);
List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
app_0_1_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_0_1.updateResourceRequests(app_0_1_requests);
app_0_1_requests.clear();
app_0_1_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory, "y"));
app_0_1.updateResourceRequests(app_0_1_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
queue.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
// no change
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
//head room for default label + head room for y partition
//head room for y partition = 100% 50%(b queue capacity ) * 160 * GB
Resource expectedHeadroomWithReqInY = Resources.add(Resources.createResource((int) (0.25 * 0.5 * 160) * GB, 1), expectedHeadroom);
assertEquals(expectedHeadroomWithReqInY, app_0_1.getHeadroom());
// Submit first application from user_1, check for new headroom
final ApplicationAttemptId appAttemptId_1_0 = TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_1_0 = new FiCaSchedulerApp(appAttemptId_1_0, user_1, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_1_0, user_1);
List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
app_1_0_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_1_0.updateResourceRequests(app_1_0_requests);
app_1_0_requests.clear();
app_1_0_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory, "y"));
app_1_0.updateResourceRequests(app_1_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
//head room = queue capacity = (50 % 90% 160 GB)/2 (for 2 users)
expectedHeadroom = Resources.createResource((int) (0.5 * 0.9 * 160 * 0.25) * GB, 1);
//head room for default label + head room for y partition
//head room for y partition = 100% 50%(b queue capacity ) * 160 * GB
expectedHeadroomWithReqInY = Resources.add(Resources.createResource((int) (0.25 * 0.5 * 160) * GB, 1), expectedHeadroom);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
assertEquals(expectedHeadroomWithReqInY, app_0_1.getHeadroom());
assertEquals(expectedHeadroomWithReqInY, app_1_0.getHeadroom());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestLeafQueue method testReservation.
@Test
public void testReservation() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
//unset maxCapacity
a.setMaxCapacity(1.0f);
// Users
final String user_0 = "user_0";
final String user_1 = "user_1";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_1, user_1);
// Setup some nodes
String host_0 = "127.0.0.1";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
final int numNodes = 2;
Resource clusterResource = Resources.createResource(numNodes * (4 * GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priority = TestUtils.createMockPriority(1);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority, recordFactory)));
app_1.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 4 * GB, 1, true, priority, recordFactory)));
// Start testing...
// Only 1 container
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(1 * GB, a.getUsedResources().getMemorySize());
assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
assertEquals(0 * GB, a.getMetrics().getAvailableMB());
// Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
// you can get one container more than user-limit
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Now, reservation should kick in for app_1
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(6 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(4 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Now free 1 container from app_0 i.e. 1G
RMContainer rmContainer = app_0.getLiveContainers().iterator().next();
a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(1 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(4 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
// Now finish another container from app_0 and fulfill the reservation
rmContainer = app_0.getLiveContainers().iterator().next();
a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(4 * GB, a.getUsedResources().getMemorySize());
assertEquals(0 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(4 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(4 * GB, a.getMetrics().getAllocatedMB());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestLeafQueue method testLocalityDelaySkipsApplication.
@Test
public void testLocalityDelaySkipsApplication() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// User
String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes and racks
String host_0 = "127.0.0.1";
String rack_0 = "rack_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, rack_0, 0, 8 * GB);
String host_1 = "127.0.0.2";
String rack_1 = "rack_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, rack_1, 0, 8 * GB);
String host_2 = "127.0.0.3";
String rack_2 = "rack_2";
FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, rack_2, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2);
final int numNodes = 3;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests and submit
// App0 has node local request for host_0/host_1, and app1 has node local
// request for host2.
Priority priority = TestUtils.createMockPriority(1);
SchedulerRequestKey schedulerKey = toSchedulerKey(priority);
List<ResourceRequest> app_0_requests_0 = new ArrayList<ResourceRequest>();
app_0_requests_0.add(TestUtils.createResourceRequest(host_0, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(TestUtils.createResourceRequest(rack_0, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(TestUtils.createResourceRequest(host_1, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(// one extra
TestUtils.createResourceRequest(// one extra
ResourceRequest.ANY, // one extra
1 * GB, // one extra
3, true, priority, recordFactory));
app_0.updateResourceRequests(app_0_requests_0);
List<ResourceRequest> app_1_requests_0 = new ArrayList<ResourceRequest>();
app_1_requests_0.add(TestUtils.createResourceRequest(host_2, 1 * GB, 1, true, priority, recordFactory));
app_1_requests_0.add(TestUtils.createResourceRequest(rack_2, 1 * GB, 1, true, priority, recordFactory));
app_1_requests_0.add(// one extra
TestUtils.createResourceRequest(// one extra
ResourceRequest.ANY, // one extra
1 * GB, // one extra
1, true, priority, recordFactory));
app_1.updateResourceRequests(app_1_requests_0);
// Start testing...
// When doing allocation, even if app_0 submit earlier than app_1, app_1 can
// still get allocated because app_0 is waiting for node-locality-delay
CSAssignment assignment = null;
// Check app_0's scheduling opportunities increased and app_1 get allocated
assignment = a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyContainerAllocated(assignment, NodeType.NODE_LOCAL);
assertEquals(1, app_0.getSchedulingOpportunities(schedulerKey));
assertEquals(3, app_0.getOutstandingAsksCount(schedulerKey));
assertEquals(0, app_0.getLiveContainers().size());
assertEquals(1, app_1.getLiveContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestReservations method testReservationsNoneAvailable.
@Test
public void testReservationsNoneAvailable() throws Exception {
// Test that we now unreserve and use a node that has space
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
setup(csConf);
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_0 = spy(app_0);
Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_1 = spy(app_1);
Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "host_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
String host_2 = "host_2";
FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2);
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
final int numNodes = 3;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priorityAM = TestUtils.createMockPriority(1);
Priority priorityMap = TestUtils.createMockPriority(5);
Priority priorityReduce = TestUtils.createMockPriority(10);
Priority priorityLast = TestUtils.createMockPriority(12);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 1, true, priorityAM, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 3 * GB, 2, true, priorityMap, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 5 * GB, 1, true, priorityReduce, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 8 * GB, 2, true, priorityLast, recordFactory)));
// Start testing...
// Only AM
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
assertEquals(22 * GB, a.getMetrics().getAvailableMB());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(5 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(5 * GB, a.getMetrics().getAllocatedMB());
assertEquals(19 * GB, a.getMetrics().getAvailableMB());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map to other node - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(16 * GB, a.getMetrics().getAvailableMB());
assertEquals(16 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// try to assign reducer (5G on node 0), but tell it's resource limits <
// used (8G) + required (5G). It will not reserved since it has to unreserve
// some resource. Even with continous reservation looking, we don't allow
// unreserve resource to reserve container.
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(Resources.createResource(10 * GB)), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(16 * GB, a.getMetrics().getAvailableMB());
// app_0's headroom = limit (10G) - used (8G) = 2G
assertEquals(2 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// try to assign reducer (5G on node 0), but tell it's resource limits <
// used (8G) + required (5G). It will not reserved since it has to unreserve
// some resource. Unfortunately, there's nothing to unreserve.
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(Resources.createResource(10 * GB)), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(16 * GB, a.getMetrics().getAvailableMB());
// app_0's headroom = limit (10G) - used (8G) = 2G
assertEquals(2 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// let it assign 5G to node_2
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(13 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(11 * GB, a.getMetrics().getAvailableMB());
assertEquals(11 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
// reserve 8G node_0
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(21 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(8 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(3 * GB, a.getMetrics().getAvailableMB());
assertEquals(3 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
// try to assign (8G on node 2). No room to allocate,
// continued to try due to having reservation above,
// but hits queue limits so can't reserve anymore.
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(21 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(8 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(3 * GB, a.getMetrics().getAvailableMB());
assertEquals(3 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestReservations method testReservationNoContinueLook.
@Test
public void testReservationNoContinueLook() throws Exception {
// Test that with reservations-continue-look-all-nodes feature off
// we don't unreserve and show we could get stuck
queues = new HashMap<String, CSQueue>();
// test that the deadlock occurs when turned off
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
csConf.setBoolean(CapacitySchedulerConfiguration.RESERVE_CONT_LOOK_ALL_NODES, false);
setup(csConf);
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_0 = spy(app_0);
Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_1 = spy(app_1);
Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "host_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
String host_2 = "host_2";
FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2);
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
final int numNodes = 3;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priorityAM = TestUtils.createMockPriority(1);
Priority priorityMap = TestUtils.createMockPriority(5);
Priority priorityReduce = TestUtils.createMockPriority(10);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 1, true, priorityAM, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 5 * GB, 2, true, priorityReduce, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 3 * GB, 2, true, priorityMap, recordFactory)));
// Start testing...
// Only AM
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
assertEquals(22 * GB, a.getMetrics().getAvailableMB());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(5 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(5 * GB, a.getMetrics().getAllocatedMB());
assertEquals(19 * GB, a.getMetrics().getAvailableMB());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map to other node - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(16 * GB, a.getMetrics().getAvailableMB());
assertEquals(16 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(null, node_0.getReservedContainer());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// try to assign reducer (5G on node 0 and should reserve)
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(13 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(11 * GB, a.getMetrics().getAvailableMB());
assertEquals(11 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// assign reducer to node 2
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(18 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(6 * GB, a.getMetrics().getAvailableMB());
assertEquals(6 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(1, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// node_1 heartbeat and won't unreserve from node_0, potentially stuck
// if AM doesn't handle
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(18 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(6 * GB, a.getMetrics().getAvailableMB());
assertEquals(6 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(1, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
}
Aggregations