use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestLeafQueue method testSingleQueueWithOneUser.
@Test
public void testSingleQueueWithOneUser() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
//unset maxCapacity
a.setMaxCapacity(1.0f);
// Users
final String user_0 = "user_0";
// Active Users Manager
AbstractUsersManager activeUserManager = a.getAbstractUsersManager();
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, activeUserManager, spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, activeUserManager, spyRMContext);
// same user
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "127.0.0.1";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
final int numNodes = 1;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priority = TestUtils.createMockPriority(1);
app_0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 3, true, priority, recordFactory)));
app_1.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 2, true, priority, recordFactory)));
// Start testing...
// Only 1 container
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(1 * GB, a.getUsedResources().getMemorySize());
assertEquals(1 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
assertEquals(0 * GB, a.getMetrics().getAvailableMB());
// Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
// you can get one container more than user-limit
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Can't allocate 3rd due to user-limit
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(2 * GB, a.getUsedResources().getMemorySize());
assertEquals(2 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(2 * GB, a.getMetrics().getAllocatedMB());
// Bump up user-limit-factor, now allocate should work
a.setUserLimitFactor(10);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(3 * GB, a.getUsedResources().getMemorySize());
assertEquals(3 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(3 * GB, a.getMetrics().getAllocatedMB());
// One more should work, for app_1, due to user-limit-factor
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(4 * GB, a.getUsedResources().getMemorySize());
assertEquals(3 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(1 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(4 * GB, a.getMetrics().getAllocatedMB());
// Test max-capacity
// Now - no more allocs since we are at max-cap
a.setMaxCapacity(0.5f);
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(4 * GB, a.getUsedResources().getMemorySize());
assertEquals(3 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(1 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(4 * GB, a.getMetrics().getAllocatedMB());
// Release each container from app_0
for (RMContainer rmContainer : app_0.getLiveContainers()) {
a.completedContainer(clusterResource, app_0, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
}
assertEquals(1 * GB, a.getUsedResources().getMemorySize());
assertEquals(0 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(1 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(1 * GB, a.getMetrics().getAllocatedMB());
// Release each container from app_1
for (RMContainer rmContainer : app_1.getLiveContainers()) {
a.completedContainer(clusterResource, app_1, node_0, rmContainer, ContainerStatus.newInstance(rmContainer.getContainerId(), ContainerState.COMPLETE, "", ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true);
}
assertEquals(0 * GB, a.getUsedResources().getMemorySize());
assertEquals(0 * GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0 * GB, a.getMetrics().getReservedMB());
assertEquals(0 * GB, a.getMetrics().getAllocatedMB());
assertEquals((int) (a.getCapacity() * node_0.getTotalResource().getMemorySize()), a.getMetrics().getAvailableMB());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestLeafQueue method testAllocateContainerOnNodeWithoutOffSwitchSpecified.
@Test
public void testAllocateContainerOnNodeWithoutOffSwitchSpecified() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(B));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
// same user
a.submitApplicationAttempt(app_1, user_0);
// Setup some nodes
String host_0 = "127.0.0.1";
FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0);
final int numNodes = 1;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
Priority priority = TestUtils.createMockPriority(1);
app_0.updateResourceRequests(Arrays.asList(TestUtils.createResourceRequest("127.0.0.1", 1 * GB, 3, true, priority, recordFactory), TestUtils.createResourceRequest(DEFAULT_RACK, 1 * GB, 3, true, priority, recordFactory)));
try {
applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
} catch (NullPointerException e) {
Assert.fail("NPE when allocating container on node but " + "forget to set off-switch request should be handled");
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestLeafQueue method testLocalityConstraints.
@Test
public void testLocalityConstraints() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
// User
String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_0, user_0);
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
a.submitApplicationAttempt(app_1, user_0);
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
// Setup some nodes and racks
String host_0_0 = "127.0.0.1";
String rack_0 = "rack_0";
String host_0_1 = "127.0.0.2";
FiCaSchedulerNode node_0_1 = TestUtils.getMockNode(host_0_1, rack_0, 0, 8 * GB);
String host_1_0 = "127.0.0.3";
String rack_1 = "rack_1";
FiCaSchedulerNode node_1_0 = TestUtils.getMockNode(host_1_0, rack_1, 0, 8 * GB);
String host_1_1 = "127.0.0.4";
FiCaSchedulerNode node_1_1 = TestUtils.getMockNode(host_1_1, rack_1, 0, 8 * GB);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0_1.getNodeID(), node_0_1, node_1_0.getNodeID(), node_1_0, node_1_1.getNodeID(), node_1_1);
final int numNodes = 4;
Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 1);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Setup resource-requests
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null > <----
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, false > <----
// ANY: < 1, 1GB, 1, false > <----
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 8G
// Blacklist: <host_0_0>
Priority priority = TestUtils.createMockPriority(1);
SchedulerRequestKey schedulerKey = toSchedulerKey(priority);
List<ResourceRequest> app_0_requests_0 = new ArrayList<ResourceRequest>();
app_0_requests_0.add(TestUtils.createResourceRequest(host_0_0, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(TestUtils.createResourceRequest(host_1_0, 1 * GB, 1, true, priority, recordFactory));
app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, false, priority, recordFactory));
app_0_requests_0.add(// only one
TestUtils.createResourceRequest(// only one
ResourceRequest.ANY, // only one
1 * GB, // only one
1, false, priority, recordFactory));
app_0.updateResourceRequests(app_0_requests_0);
app_0.updateBlacklist(Collections.singletonList(host_0_0), null);
app_0_requests_0.clear();
//
// Start testing...
//
// node_0_1
// Shouldn't allocate since RR(rack_0) = null && RR(ANY) = relax: false
CSAssignment assignment = a.assignContainers(clusterResource, node_0_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyNoContainerAllocated(assignment);
// should be 0
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null > <----
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, false > <----
// ANY: < 1, 1GB, 1, false > <----
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 8G
// Blacklist: <host_0_0>
// node_1_1
// Shouldn't allocate since RR(rack_1) = relax: false
assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyNoContainerAllocated(assignment);
// should be 0
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
// Allow rack-locality for rack_1, but blacklist node_1_1
app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, true, priority, recordFactory));
app_0.updateResourceRequests(app_0_requests_0);
app_0.updateBlacklist(Collections.singletonList(host_1_1), null);
app_0_requests_0.clear();
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null >
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, true >
// ANY: < 1, 1GB, 1, false >
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 8G
// Blacklist: < host_0_0 , host_1_1 > <----
// node_1_1
// Shouldn't allocate since node_1_1 is blacklisted
assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyNoContainerAllocated(assignment);
// should be 0
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
// Now, remove node_1_1 from blacklist, but add rack_1 to blacklist
app_0.updateResourceRequests(app_0_requests_0);
app_0.updateBlacklist(Collections.singletonList(rack_1), Collections.singletonList(host_1_1));
app_0_requests_0.clear();
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null >
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, true >
// ANY: < 1, 1GB, 1, false >
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 8G
// Blacklist: < host_0_0 , rack_1 > <----
// node_1_1
// Shouldn't allocate since rack_1 is blacklisted
assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyNoContainerAllocated(assignment);
// should be 0
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
// Now remove rack_1 from blacklist
app_0.updateResourceRequests(app_0_requests_0);
app_0.updateBlacklist(null, Collections.singletonList(rack_1));
app_0_requests_0.clear();
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null >
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, true >
// ANY: < 1, 1GB, 1, false >
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 8G
// Blacklist: < host_0_0 > <----
// Now, should allocate since RR(rack_1) = relax: true
assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyNoContainerAllocated(assignment);
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
assertEquals(1, app_0.getOutstandingAsksCount(schedulerKey));
// Now sanity-check node_local
app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, false, priority, recordFactory));
app_0_requests_0.add(// only one
TestUtils.createResourceRequest(// only one
ResourceRequest.ANY, // only one
1 * GB, // only one
1, false, priority, recordFactory));
app_0.updateResourceRequests(app_0_requests_0);
app_0_requests_0.clear();
// resourceName: <priority, memory, #containers, relaxLocality>
// host_0_0: < 1, 1GB, 1, true >
// host_0_1: < null >
// rack_0: < null >
// host_1_0: < 1, 1GB, 1, true >
// host_1_1: < null >
// rack_1: < 1, 1GB, 1, false > <----
// ANY: < 1, 1GB, 1, false > <----
// Availability:
// host_0_0: 8G
// host_0_1: 8G
// host_1_0: 8G
// host_1_1: 7G
assignment = a.assignContainers(clusterResource, node_1_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
applyCSAssignment(clusterResource, assignment, a, nodes, apps);
verifyContainerAllocated(assignment, NodeType.NODE_LOCAL);
assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
assertEquals(0, app_0.getOutstandingAsksCount(schedulerKey));
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class TestCapacityScheduler method testSchedulingOnRemovedNode.
@Test
public void testSchedulingOnRemovedNode() throws Exception {
Configuration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false);
MockRM rm = new MockRM(conf);
rm.start();
RMApp app = rm.submitApp(100);
rm.drainEvents();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10240, 10);
MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
//remove nm2 to keep am alive
MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10240, 10);
am.allocate(ResourceRequest.ANY, 2048, 1, null);
CapacityScheduler scheduler = (CapacityScheduler) rm.getRMContext().getScheduler();
FiCaSchedulerNode node = (FiCaSchedulerNode) scheduler.getNodeTracker().getNode(nm2.getNodeId());
scheduler.handle(new NodeRemovedSchedulerEvent(rm.getRMContext().getRMNodes().get(nm2.getNodeId())));
// schedulerNode is removed, try allocate a container
scheduler.allocateContainersToNode(new SimplePlacementSet<>(node), true);
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class LeafQueue method allocateFromReservedContainer.
private CSAssignment allocateFromReservedContainer(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, ResourceLimits currentResourceLimits, SchedulingMode schedulingMode) {
FiCaSchedulerNode node = PlacementSetUtils.getSingleNode(ps);
if (null == node) {
return null;
}
RMContainer reservedContainer = node.getReservedContainer();
if (reservedContainer != null) {
FiCaSchedulerApp application = getApplication(reservedContainer.getApplicationAttemptId());
if (null != application) {
ActivitiesLogger.APP.startAppAllocationRecording(activitiesManager, node.getNodeID(), SystemClock.getInstance().getTime(), application);
CSAssignment assignment = application.assignContainers(clusterResource, ps, currentResourceLimits, schedulingMode, reservedContainer);
return assignment;
}
}
return null;
}
Aggregations