use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestChildQueueOrder method testSortedQueues.
@Test
@SuppressWarnings("unchecked")
public void testSortedQueues() throws Exception {
// Setup queue configs
setupSortedQueues(csConf);
Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
CSQueue root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, CapacitySchedulerConfiguration.ROOT, queues, queues, TestUtils.spyHook);
// Setup some nodes
final int memoryPerNode = 10;
final int coresPerNode = 16;
final int numNodes = 1;
FiCaSchedulerNode node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, memoryPerNode * GB);
doNothing().when(node_0).releaseContainer(any(ContainerId.class), anyBoolean());
final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode * GB), numNodes * coresPerNode);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
// Start testing
CSQueue a = queues.get(A);
CSQueue b = queues.get(B);
CSQueue c = queues.get(C);
CSQueue d = queues.get(D);
// Make a/b/c/d has >0 pending resource, so that allocation will continue.
queues.get(CapacitySchedulerConfiguration.ROOT).getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
a.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
b.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
c.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
d.getQueueResourceUsage().incPending(Resources.createResource(1 * GB));
final String user_0 = "user_0";
// Stub an App and its containerCompleted
FiCaSchedulerApp app_0 = getMockApplication(0, user_0);
doReturn(true).when(app_0).containerCompleted(any(RMContainer.class), any(ContainerStatus.class), any(RMContainerEventType.class), any(String.class));
Priority priority = TestUtils.createMockPriority(1);
ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);
DrainDispatcher drainDispatcher = new DrainDispatcher();
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
RMContext rmContext = mock(RMContext.class);
when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(app_0.getApplicationId(), 1);
ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
Container container = TestUtils.getMockContainer(containerId, node_0.getNodeID(), Resources.createResource(1 * GB), priority);
RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), appAttemptId, node_0.getNodeID(), "user", rmContext);
// Assign {1,2,3,4} 1GB containers respectively to queues
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
for (int i = 0; i < 2; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
for (int i = 0; i < 3; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 1 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
for (int i = 0; i < 4; i++) {
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 1 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
verifyQueueMetrics(a, 1 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 4 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 3 x 1GB containers from D
for (int i = 0; i < 3; i++) {
d.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
}
verifyQueueMetrics(a, 1 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 1 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 2 x 1GB Containers to A
for (int i = 0; i < 2; i++) {
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
}
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 1GB Container from A
a.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 2 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 1GB container to B
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 3 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
//Release 1GB container resources from B
b.completedContainer(clusterResource, app_0, node_0, rmContainer, null, RMContainerEventType.KILL, null, true);
verifyQueueMetrics(a, 2 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
//reset manually resources on node
node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, (memoryPerNode - 2 - 2 - 3 - 1) * GB);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Assign 1GB container to A
stubQueueAllocation(a, clusterResource, node_0, 1 * GB);
stubQueueAllocation(b, clusterResource, node_0, 0 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 0 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
verifyQueueMetrics(d, 1 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
// Now do the real test, where B and D request a 1GB container
// D should should get the next container if the order is correct
stubQueueAllocation(a, clusterResource, node_0, 0 * GB);
stubQueueAllocation(b, clusterResource, node_0, 1 * GB);
stubQueueAllocation(c, clusterResource, node_0, 0 * GB);
stubQueueAllocation(d, clusterResource, node_0, 1 * GB);
root.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
InOrder allocationOrder = inOrder(d, b);
allocationOrder.verify(d).assignContainers(eq(clusterResource), any(PlacementSet.class), any(ResourceLimits.class), any(SchedulingMode.class));
allocationOrder.verify(b).assignContainers(eq(clusterResource), any(PlacementSet.class), any(ResourceLimits.class), any(SchedulingMode.class));
verifyQueueMetrics(a, 3 * GB, clusterResource);
verifyQueueMetrics(b, 2 * GB, clusterResource);
verifyQueueMetrics(c, 3 * GB, clusterResource);
//D got the container
verifyQueueMetrics(d, 2 * GB, clusterResource);
LOG.info("status child-queues: " + ((ParentQueue) root).getChildQueuesToPrint());
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestNodeBlacklistingOnAMFailures method testNodeBlacklistingOnAMFailure.
@Test(timeout = 100000)
public void testNodeBlacklistingOnAMFailure() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = startRM(conf, dispatcher);
CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler();
// Register 5 nodes, so that we can blacklist atleast one if AM container
// is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1.
// First register 2 nodes, and after AM lauched register 3 more nodes.
MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService());
nm1.registerNode();
MockNM nm2 = new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService());
nm2.registerNode();
RMApp app = rm.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm1);
ContainerId amContainerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
RMContainer rmContainer = scheduler.getRMContainer(amContainerId);
NodeId nodeWhereAMRan = rmContainer.getAllocatedNode();
MockNM currentNode, otherNode;
if (nodeWhereAMRan.equals(nm1.getNodeId())) {
currentNode = nm1;
otherNode = nm2;
} else {
currentNode = nm2;
otherNode = nm1;
}
// register 3 nodes now
MockNM nm3 = new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService());
nm3.registerNode();
MockNM nm4 = new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService());
nm4.registerNode();
MockNM nm5 = new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService());
nm5.registerNode();
// Set the exist status to INVALID so that we can verify that the system
// automatically blacklisting the node
makeAMContainerExit(rm, amContainerId, currentNode, ContainerExitStatus.INVALID);
// restart the am
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
System.out.println("New AppAttempt launched " + attempt.getAppAttemptId());
// Try the current node a few times
for (int i = 0; i <= 2; i++) {
currentNode.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals("AppAttemptState should still be SCHEDULED if currentNode is " + "blacklisted correctly", RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
}
// Now try the other node
otherNode.nodeHeartbeat(true);
dispatcher.await();
// Now the AM container should be allocated
MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000);
MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId());
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
amContainerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
rmContainer = scheduler.getRMContainer(amContainerId);
nodeWhereAMRan = rmContainer.getAllocatedNode();
// The other node should now receive the assignment
Assert.assertEquals("After blacklisting, AM should have run on the other node", otherNode.getNodeId(), nodeWhereAMRan);
am2.registerAppAttempt();
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
List<Container> allocatedContainers = TestAMRestart.allocateContainers(currentNode, am2, 1);
Assert.assertEquals("Even though AM is blacklisted from the node, application can " + "still allocate non-AM containers there", currentNode.getNodeId(), allocatedContainers.get(0).getNodeId());
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestApplicationCleanup method testContainerCleanupWhenRMRestartedAppNotRegistered.
@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testContainerCleanupWhenRMRestartedAppNotRegistered() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start RM
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm1 = new MockRM(conf, memStore) {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
rm1.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
// start new RM
final DrainDispatcher dispatcher2 = new DrainDispatcher();
MockRM rm2 = new MockRM(conf, memStore) {
@Override
protected Dispatcher createDispatcher() {
return dispatcher2;
}
};
rm2.start();
// nm1 register to rm2, and do a heartbeat
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1.registerNode(Arrays.asList(app0.getApplicationId()));
rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
// Add unknown container for application unknown to scheduler
NodeHeartbeatResponse response = nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 2, ContainerState.RUNNING);
waitForContainerCleanup(dispatcher2, nm1, response);
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestLogAggregationService method createDispatcher.
private DrainDispatcher createDispatcher() {
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(this.conf);
dispatcher.start();
return dispatcher;
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestNonAggregatingLogHandler method createDispatcher.
private DrainDispatcher createDispatcher(Configuration conf) {
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
return dispatcher;
}
Aggregations