use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestReservations method testReservationNoContinueLook.
@Test
public void testReservationNoContinueLook() throws Exception {
// Test that with reservations-continue-look-all-nodes feature off
// we don't unreserve and show we could get stuck
queues = new HashMap<String, CSQueue>();
// test that the deadlock occurs when turned off
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
csConf.setBoolean(CapacitySchedulerConfiguration.RESERVE_CONT_LOOK_ALL_NODES, false);
setup(csConf);
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_0 = spy(app_0);
Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_1 = spy(app_1);
Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "host_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
String host_2 = "host_2";
FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2);
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
final int numNodes = 3;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priorityAM = TestUtils.createMockPriority(1);
Priority priorityMap = TestUtils.createMockPriority(5);
Priority priorityReduce = TestUtils.createMockPriority(10);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 1, true, priorityAM, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 5 * GB, 2, true, priorityReduce, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 3 * GB, 2, true, priorityMap, recordFactory)));
// Start testing...
// Only AM
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
assertEquals(22 * GB, a.getMetrics().getAvailableMB());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(5 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(5 * GB, a.getMetrics().getAllocatedMB());
assertEquals(19 * GB, a.getMetrics().getAvailableMB());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
// Only 1 map to other node - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(16 * GB, a.getMetrics().getAvailableMB());
assertEquals(16 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(null, node_0.getReservedContainer());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// try to assign reducer (5G on node 0 and should reserve)
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(13 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(11 * GB, a.getMetrics().getAvailableMB());
assertEquals(11 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// assign reducer to node 2
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(18 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(6 * GB, a.getMetrics().getAvailableMB());
assertEquals(6 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(1, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// node_1 heartbeat and won't unreserve from node_0, potentially stuck
// if AM doesn't handle
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(18 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(6 * GB, a.getMetrics().getAvailableMB());
assertEquals(6 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(5 * GB, node_2.getAllocatedResource().getMemorySize());
assertEquals(1, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestReservations method testGetAppToUnreserve.
@Test
public void testGetAppToUnreserve() throws Exception {
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
setup(csConf);
final String user_0 = "user_0";
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
String host_0 = "host_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
Resource clusterResource = Resources.createResource(2 * 8 * GB);
// Setup resource-requests
Priority p = TestUtils.createMockPriority(5);
SchedulerRequestKey priorityMap = toSchedulerKey(p);
Resource capability = Resources.createResource(2 * GB, 0);
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
RMContext rmContext = mock(RMContext.class);
ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);
DrainDispatcher drainDispatcher = new DrainDispatcher();
when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(app_0.getApplicationId(), 1);
ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
Container container = TestUtils.getMockContainer(containerId, node_1.getNodeID(), Resources.createResource(2 * GB), priorityMap.getPriority());
RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), appAttemptId, node_1.getNodeID(), "user", rmContext);
Container container_1 = TestUtils.getMockContainer(containerId, node_0.getNodeID(), Resources.createResource(1 * GB), priorityMap.getPriority());
RMContainer rmContainer_1 = new RMContainerImpl(container_1, SchedulerRequestKey.extractFrom(container_1), appAttemptId, node_0.getNodeID(), "user", rmContext);
// no reserved containers
NodeId unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
assertEquals(null, unreserveId);
// no reserved containers - reserve then unreserve
app_0.reserve(node_0, priorityMap, rmContainer_1, container_1);
app_0.unreserve(priorityMap, node_0, rmContainer_1);
unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
assertEquals(null, unreserveId);
// no container large enough is reserved
app_0.reserve(node_0, priorityMap, rmContainer_1, container_1);
unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
assertEquals(null, unreserveId);
// reserve one that is now large enough
app_0.reserve(node_1, priorityMap, rmContainer, container);
unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource);
assertEquals(node_1.getNodeID(), unreserveId);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestReservations method testAssignContainersNeedToUnreserve.
@Test
@SuppressWarnings("unchecked")
public void testAssignContainersNeedToUnreserve() throws Exception {
// Test that we now unreserve and use a node that has space
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
setup(csConf);
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_0 = spy(app_0);
Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class));
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
app_1 = spy(app_1);
Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), any(String.class));
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "host_0";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
String host_1 = "host_1";
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1);
cs.getNodeTracker().addNode(node_0);
cs.getNodeTracker().addNode(node_1);
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
final int numNodes = 2;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priorityAM = TestUtils.createMockPriority(1);
Priority priorityMap = TestUtils.createMockPriority(5);
Priority priorityReduce = TestUtils.createMockPriority(10);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 1, true, priorityAM, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 5 * GB, 2, true, priorityReduce, recordFactory)));
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 3 * GB, 2, true, priorityMap, recordFactory)));
// Start testing...
// Only AM
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
assertEquals(14 * GB, a.getMetrics().getAvailableMB());
assertEquals(2 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
// Only 1 map - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(5 * GB, a.getUsedResources().getMemorySize());
assertEquals(5 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(5 * GB, a.getMetrics().getAllocatedMB());
assertEquals(11 * GB, a.getMetrics().getAvailableMB());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(0 * GB, node_1.getAllocatedResource().getMemorySize());
// Only 1 map to other node - simulating reduce
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(8 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(8 * GB, a.getMetrics().getAvailableMB());
assertEquals(8 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(null, node_0.getReservedContainer());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// try to assign reducer (5G on node 0 and should reserve)
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(13 * GB, a.getUsedResources().getMemorySize());
assertEquals(8 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(5 * GB, a.getMetrics().getReservedMB());
assertEquals(8 * GB, a.getMetrics().getAllocatedMB());
assertEquals(3 * GB, a.getMetrics().getAvailableMB());
assertEquals(3 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(5 * GB, node_0.getReservedContainer().getReservedResource().getMemorySize());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(3 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(2, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
// could allocate but told need to unreserve first
TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps);
assertEquals(13 * GB, a.getUsedResources().getMemorySize());
assertEquals(13 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(13 * GB, a.getMetrics().getAllocatedMB());
assertEquals(3 * GB, a.getMetrics().getAvailableMB());
assertEquals(3 * GB, app_0.getHeadroom().getMemorySize());
assertEquals(null, node_0.getReservedContainer());
assertEquals(5 * GB, node_0.getAllocatedResource().getMemorySize());
assertEquals(8 * GB, node_1.getAllocatedResource().getMemorySize());
assertEquals(1, app_0.getOutstandingAsksCount(toSchedulerKey(priorityReduce)));
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestParentQueue method testOffSwitchScheduling.
@Test
public void testOffSwitchScheduling() throws Exception {
// Setup queue configs
setupSingleLevelQueues(csConf);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, CapacitySchedulerConfiguration.ROOT, queues, queues, TestUtils.spyHook);
// Setup some nodes
final int memoryPerNode = 10;
final int coresPerNode = 16;
final int numNodes = 2;
FiCaSchedulerNode node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, memoryPerNode * GB);
FiCaSchedulerNode node_1 = TestUtils.getMockNode("host_1", DEFAULT_RACK, 0, memoryPerNode * GB);
final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode * GB), numNodes * coresPerNode);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Start testing
LeafQueue a = (LeafQueue) queues.get(A);
LeafQueue b = (LeafQueue) queues.get(B);
a.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
b.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
queues.get(CapacitySchedulerConfiguration.ROOT).getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
// Simulate B returning a container on node_0
stubQueueAllocation(a, clusterResource, node_0, 0 * GB, NodeType.OFF_SWITCH);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(a, 0 * GB, clusterResource);
verifyQueueMetrics(b, 1 * GB, clusterResource);
// Now, A should get the scheduling opportunity since A=0G/6G, B=1G/14G
// also, B gets a scheduling opportunity since A allocates RACK_LOCAL
stubQueueAllocation(a, clusterResource, node_1, 2 * GB, NodeType.RACK_LOCAL);
stubQueueAllocation(b, clusterResource, node_1, 1 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
InOrder allocationOrder = inOrder(a);
allocationOrder.verify(a).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
root.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
allocationOrder = inOrder(b);
allocationOrder.verify(b).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
// Now, B should get the scheduling opportunity
// since A has 2/6G while B has 2/14G,
// However, since B returns off-switch, A won't get an opportunity
stubQueueAllocation(a, clusterResource, node_0, 1 * GB, NodeType.NODE_LOCAL);
stubQueueAllocation(b, clusterResource, node_0, 2 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
allocationOrder = inOrder(b, a);
allocationOrder.verify(b).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
allocationOrder.verify(a).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 4 * GB, clusterResource);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestParentQueue method testOffSwitchSchedulingMultiLevelQueues.
@Test
public void testOffSwitchSchedulingMultiLevelQueues() throws Exception {
// Setup queue configs
setupMultiLevelQueues(csConf);
//B3
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, CapacitySchedulerConfiguration.ROOT, queues, queues, TestUtils.spyHook);
// Setup some nodes
final int memoryPerNode = 10;
final int coresPerNode = 10;
final int numNodes = 2;
FiCaSchedulerNode node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, memoryPerNode * GB);
FiCaSchedulerNode node_1 = TestUtils.getMockNode("host_1", DEFAULT_RACK, 0, memoryPerNode * GB);
final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode * GB), numNodes * coresPerNode);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Start testing
LeafQueue b3 = (LeafQueue) queues.get(B3);
LeafQueue b2 = (LeafQueue) queues.get(B2);
b2.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
b3.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
queues.get(CapacitySchedulerConfiguration.ROOT).getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
CSQueue b = queues.get(B);
b.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
// Simulate B3 returning a container on node_0
stubQueueAllocation(b2, clusterResource, node_0, 0 * GB, NodeType.OFF_SWITCH);
stubQueueAllocation(b3, clusterResource, node_0, 1 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(b2, 0 * GB, clusterResource);
verifyQueueMetrics(b3, 1 * GB, clusterResource);
// Now, B2 should get the scheduling opportunity since B2=0G/2G, B3=1G/7G
// also, B3 gets a scheduling opportunity since B2 allocates RACK_LOCAL
stubQueueAllocation(b2, clusterResource, node_1, 1 * GB, NodeType.RACK_LOCAL);
stubQueueAllocation(b3, clusterResource, node_1, 1 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
root.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
InOrder allocationOrder = inOrder(b2, b3);
allocationOrder.verify(b2).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
allocationOrder.verify(b3).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
verifyQueueMetrics(b2, 1 * GB, clusterResource);
verifyQueueMetrics(b3, 2 * GB, clusterResource);
// Now, B3 should get the scheduling opportunity
// since B2 has 1/2G while B3 has 2/7G,
// However, since B3 returns off-switch, B2 won't get an opportunity
stubQueueAllocation(b2, clusterResource, node_0, 1 * GB, NodeType.NODE_LOCAL);
stubQueueAllocation(b3, clusterResource, node_0, 1 * GB, NodeType.OFF_SWITCH);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
allocationOrder = inOrder(b3, b2);
allocationOrder.verify(b3).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
allocationOrder.verify(b2).assignContainers(eq(clusterResource), any(PlacementSet.class), anyResourceLimits(), any(SchedulingMode.class));
verifyQueueMetrics(b2, 1 * GB, clusterResource);
verifyQueueMetrics(b3, 3 * GB, clusterResource);
}
Aggregations