use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class CapacityScheduler method allocateOrReserveNewContainers.
private CSAssignment allocateOrReserveNewContainers(PlacementSet<FiCaSchedulerNode> ps, boolean withNodeHeartbeat) {
CSAssignment assignment = getRootQueue().assignContainers(getClusterResource(), ps, new ResourceLimits(labelManager.getResourceByLabel(ps.getPartition(), getClusterResource())), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
submitResourceCommitRequest(getClusterResource(), assignment);
if (Resources.greaterThan(calculator, getClusterResource(), assignment.getResource(), Resources.none())) {
if (withNodeHeartbeat) {
updateSchedulerHealth(lastNodeUpdateTime, PlacementSetUtils.getSingleNode(ps).getNodeID(), assignment);
}
return assignment;
}
// Only do non-exclusive allocation when node has node-labels.
if (StringUtils.equals(ps.getPartition(), RMNodeLabelsManager.NO_LABEL)) {
return null;
}
// Only do non-exclusive allocation when the node-label supports that
try {
if (rmContext.getNodeLabelManager().isExclusiveNodeLabel(ps.getPartition())) {
return null;
}
} catch (IOException e) {
LOG.warn("Exception when trying to get exclusivity of node label=" + ps.getPartition(), e);
return null;
}
// Try to use NON_EXCLUSIVE
assignment = getRootQueue().assignContainers(getClusterResource(), ps, // resources, should consider labeled resources as well.
new ResourceLimits(labelManager.getResourceByLabel(RMNodeLabelsManager.NO_LABEL, getClusterResource())), SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
assignment.setSchedulingMode(SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
submitResourceCommitRequest(getClusterResource(), assignment);
return assignment;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class CapacitySchedulerQueueManager method reinitializeQueues.
@Override
public void reinitializeQueues(CapacitySchedulerConfiguration newConf) throws IOException {
// Parse new queues
Map<String, CSQueue> newQueues = new HashMap<>();
CSQueue newRoot = parseQueue(this.csContext, newConf, null, CapacitySchedulerConfiguration.ROOT, newQueues, queues, NOOP);
// Ensure queue hiearchy in the new XML file is proper.
validateQueueHierarchy(queues, newQueues);
// Add new queues and delete OldQeueus only after validation.
updateQueues(queues, newQueues);
// Re-configure queues
root.reinitialize(newRoot, this.csContext.getClusterResource());
setQueueAcls(authorizer, appPriorityACLManager, queues);
// Re-calculate headroom for active applications
Resource clusterResource = this.csContext.getClusterResource();
root.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
labelManager.reinitializeQueueLabels(getQueueToLabels());
this.queueStateManager.initialize(this);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestApplicationLimits method testHeadroom.
@Test
public void testHeadroom() throws Exception {
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
csConf.setUserLimit(CapacitySchedulerConfiguration.ROOT + "." + A, 25);
setupQueueConfiguration(csConf);
YarnConfiguration conf = new YarnConfiguration();
CapacitySchedulerContext csContext = mock(CapacitySchedulerContext.class);
when(csContext.getConfiguration()).thenReturn(csConf);
when(csContext.getConf()).thenReturn(conf);
when(csContext.getMinimumResourceCapability()).thenReturn(Resources.createResource(GB));
when(csContext.getMaximumResourceCapability()).thenReturn(Resources.createResource(16 * GB));
when(csContext.getResourceCalculator()).thenReturn(resourceCalculator);
when(csContext.getRMContext()).thenReturn(rmContext);
// Say cluster has 100 nodes of 16G each
Resource clusterResource = Resources.createResource(100 * 16 * GB);
when(csContext.getClusterResource()).thenReturn(clusterResource);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue rootQueue = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, "root", queues, queues, TestUtils.spyHook);
ResourceUsage queueCapacities = rootQueue.getQueueResourceUsage();
when(csContext.getClusterResourceUsage()).thenReturn(queueCapacities);
// Manipulate queue 'a'
LeafQueue queue = TestLeafQueue.stubLeafQueue((LeafQueue) queues.get(A));
queue.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
String host_0 = "host_0";
String rack_0 = "rack_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, rack_0, 0, 16 * GB);
final String user_0 = "user_0";
final String user_1 = "user_1";
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
RMContext rmContext = TestUtils.getMockRMContext();
RMContext spyRMContext = spy(rmContext);
ConcurrentMap<ApplicationId, RMApp> spyApps = spy(new ConcurrentHashMap<ApplicationId, RMApp>());
RMApp rmApp = mock(RMApp.class);
ResourceRequest amResourceRequest = mock(ResourceRequest.class);
Resource amResource = Resources.createResource(0, 0);
when(amResourceRequest.getCapability()).thenReturn(amResource);
when(rmApp.getAMResourceRequest()).thenReturn(amResourceRequest);
Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId) Matchers.any());
when(spyRMContext.getRMApps()).thenReturn(spyApps);
RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class);
when(rmApp.getRMAppAttempt((ApplicationAttemptId) Matchers.any())).thenReturn(rmAppAttempt);
when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt);
Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId) Matchers.any());
Mockito.doReturn(true).when(spyApps).containsKey((ApplicationId) Matchers.any());
Priority priority_1 = TestUtils.createMockPriority(1);
// Submit first application with some resource-requests from user_0,
// and check headroom
final ApplicationAttemptId appAttemptId_0_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0_0 = new FiCaSchedulerApp(appAttemptId_0_0, user_0, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_0_0, user_0);
List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
app_0_0_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_0_0.updateResourceRequests(app_0_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
Resource expectedHeadroom = Resources.createResource(5 * 16 * GB, 1);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
// Submit second application from user_0, check headroom
final ApplicationAttemptId appAttemptId_0_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_0_1 = new FiCaSchedulerApp(appAttemptId_0_1, user_0, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_0_1, user_0);
List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
app_0_1_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_0_1.updateResourceRequests(app_0_1_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
// no change
assertEquals(expectedHeadroom, app_0_1.getHeadroom());
// Submit first application from user_1, check for new headroom
final ApplicationAttemptId appAttemptId_1_0 = TestUtils.getMockApplicationAttemptId(2, 0);
FiCaSchedulerApp app_1_0 = new FiCaSchedulerApp(appAttemptId_1_0, user_1, queue, queue.getAbstractUsersManager(), spyRMContext);
queue.submitApplicationAttempt(app_1_0, user_1);
List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
app_1_0_requests.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority_1, recordFactory));
app_1_0.updateResourceRequests(app_1_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
// changes
expectedHeadroom = Resources.createResource(10 * 16 * GB / 2, 1);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
assertEquals(expectedHeadroom, app_0_1.getHeadroom());
assertEquals(expectedHeadroom, app_1_0.getHeadroom());
// Now reduce cluster size and check for the smaller headroom
clusterResource = Resources.createResource(90 * 16 * GB);
// Any change is cluster resource needs to enforce user-limit recomputation.
// In existing code, LeafQueue#updateClusterResource handled this. However
// here that method was not used.
queue.getUsersManager().userLimitNeedsRecompute();
queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), // Schedule to compute
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
// changes
expectedHeadroom = Resources.createResource(9 * 16 * GB / 2, 1);
assertEquals(expectedHeadroom, app_0_0.getHeadroom());
assertEquals(expectedHeadroom, app_0_1.getHeadroom());
assertEquals(expectedHeadroom, app_1_0.getHeadroom());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestApplicationLimits method testLimitsComputation.
@Test
public void testLimitsComputation() throws Exception {
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
setupQueueConfiguration(csConf);
YarnConfiguration conf = new YarnConfiguration();
CapacitySchedulerContext csContext = mock(CapacitySchedulerContext.class);
when(csContext.getConfiguration()).thenReturn(csConf);
when(csContext.getConf()).thenReturn(conf);
when(csContext.getMinimumResourceCapability()).thenReturn(Resources.createResource(GB, 1));
when(csContext.getMaximumResourceCapability()).thenReturn(Resources.createResource(16 * GB, 16));
when(csContext.getResourceCalculator()).thenReturn(resourceCalculator);
when(csContext.getRMContext()).thenReturn(rmContext);
when(csContext.getPreemptionManager()).thenReturn(new PreemptionManager());
// Say cluster has 100 nodes of 16G each
Resource clusterResource = Resources.createResource(100 * 16 * GB, 100 * 16);
when(csContext.getClusterResource()).thenReturn(clusterResource);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, "root", queues, queues, TestUtils.spyHook);
LeafQueue queue = (LeafQueue) queues.get(A);
LOG.info("Queue 'A' -" + " aMResourceLimit=" + queue.getAMResourceLimit() + " UserAMResourceLimit=" + queue.getUserAMResourceLimit());
Resource amResourceLimit = Resource.newInstance(160 * GB, 1);
assertEquals(queue.calculateAndGetAMResourceLimit(), amResourceLimit);
assertEquals(queue.getUserAMResourceLimit(), Resource.newInstance(80 * GB, 1));
// Assert in metrics
assertEquals(queue.getMetrics().getAMResourceLimitMB(), amResourceLimit.getMemorySize());
assertEquals(queue.getMetrics().getAMResourceLimitVCores(), amResourceLimit.getVirtualCores());
assertEquals((int) (clusterResource.getMemorySize() * queue.getAbsoluteCapacity()), queue.getMetrics().getAvailableMB());
// Add some nodes to the cluster & test new limits
clusterResource = Resources.createResource(120 * 16 * GB);
root.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
assertEquals(queue.calculateAndGetAMResourceLimit(), Resource.newInstance(192 * GB, 1));
assertEquals(queue.getUserAMResourceLimit(), Resource.newInstance(96 * GB, 1));
assertEquals((int) (clusterResource.getMemorySize() * queue.getAbsoluteCapacity()), queue.getMetrics().getAvailableMB());
// should return -1 if per queue setting not set
assertEquals((int) CapacitySchedulerConfiguration.UNDEFINED, csConf.getMaximumApplicationsPerQueue(queue.getQueuePath()));
int expectedMaxApps = (int) (CapacitySchedulerConfiguration.DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS * queue.getAbsoluteCapacity());
assertEquals(expectedMaxApps, queue.getMaxApplications());
int expectedMaxAppsPerUser = Math.min(expectedMaxApps, (int) (expectedMaxApps * (queue.getUserLimit() / 100.0f) * queue.getUserLimitFactor()));
assertEquals(expectedMaxAppsPerUser, queue.getMaxApplicationsPerUser());
// should default to global setting if per queue setting not set
assertEquals((long) CapacitySchedulerConfiguration.DEFAULT_MAXIMUM_APPLICATIONMASTERS_RESOURCE_PERCENT, (long) csConf.getMaximumApplicationMasterResourcePerQueuePercent(queue.getQueuePath()));
// Change the per-queue max AM resources percentage.
csConf.setFloat(PREFIX + queue.getQueuePath() + ".maximum-am-resource-percent", 0.5f);
// Re-create queues to get new configs.
queues = new HashMap<String, CSQueue>();
root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, "root", queues, queues, TestUtils.spyHook);
clusterResource = Resources.createResource(100 * 16 * GB);
queue = (LeafQueue) queues.get(A);
assertEquals((long) 0.5, (long) csConf.getMaximumApplicationMasterResourcePerQueuePercent(queue.getQueuePath()));
assertEquals(queue.calculateAndGetAMResourceLimit(), Resource.newInstance(800 * GB, 1));
assertEquals(queue.getUserAMResourceLimit(), Resource.newInstance(400 * GB, 1));
// Change the per-queue max applications.
csConf.setInt(PREFIX + queue.getQueuePath() + ".maximum-applications", 9999);
// Re-create queues to get new configs.
queues = new HashMap<String, CSQueue>();
root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, "root", queues, queues, TestUtils.spyHook);
queue = (LeafQueue) queues.get(A);
assertEquals(9999, (int) csConf.getMaximumApplicationsPerQueue(queue.getQueuePath()));
assertEquals(9999, queue.getMaxApplications());
expectedMaxAppsPerUser = Math.min(9999, (int) (9999 * (queue.getUserLimit() / 100.0f) * queue.getUserLimitFactor()));
assertEquals(expectedMaxAppsPerUser, queue.getMaxApplicationsPerUser());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.
the class TestLeafQueue method testReservation.
@Test
public void testReservation() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
//unset maxCapacity
a.setMaxCapacity(1.0f);
// Users
final String user_0 = "user_0";
final String user_1 = "user_1";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_1, user_1);
// Setup some nodes
String host_0 = "127.0.0.1";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
final int numNodes = 2;
Resource clusterResource = Resources.createResource(numNodes * (4 * GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priority = TestUtils.createMockPriority(1);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority, recordFactory)));
app_1.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 4 * GB, 1, true, priority, recordFactory)));
// Start testing...
// Only 1 container
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(1 * GB, a.getUsedResources().getMemorySize());
assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
assertEquals(0 * GB, a.getMetrics().getAvailableMB());
// Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
// you can get one container more than user-limit
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Now, reservation should kick in for app_1
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(6 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(4 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Now free 1 container from app_0 i.e. 1G
RMContainer rmContainer = app_0.getLiveContainers().iterator().next();
a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(1 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(4 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
// Now finish another container from app_0 and fulfill the reservation
rmContainer = app_0.getLiveContainers().iterator().next();
a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(4 * GB, a.getUsedResources().getMemorySize());
assertEquals(0 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(4 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentReservation().getMemorySize());
assertEquals(4 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(4 * GB, a.getMetrics().getAllocatedMB());
}
Aggregations