use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.
the class TestRM method testApplicationKillAtAcceptedState.
/**
* Validate killing an application when it is at accepted state.
* @throws Exception exception
*/
@Test(timeout = 60000)
public void testApplicationKillAtAcceptedState() throws Exception {
final Dispatcher dispatcher = new DrainDispatcher() {
@Override
public EventHandler<Event> getEventHandler() {
class EventArgMatcher extends ArgumentMatcher<AbstractEvent> {
@Override
public boolean matches(Object argument) {
if (argument instanceof RMAppAttemptEvent) {
if (((RMAppAttemptEvent) argument).getType().equals(RMAppAttemptEventType.KILL)) {
return true;
}
}
return false;
}
}
EventHandler handler = spy(super.getEventHandler());
doNothing().when(handler).handle(argThat(new EventArgMatcher()));
return handler;
}
};
MockRM rm = new MockRM(conf) {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
// test metrics
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
int appsKilled = metrics.getAppsKilled();
int appsSubmitted = metrics.getAppsSubmitted();
rm.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
nm1.registerNode();
// a failed app
RMApp application = rm.submitApp(200);
MockAM am = MockRM.launchAM(application, rm, nm1);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
nm1.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.RUNNING);
rm.waitForState(application.getApplicationId(), RMAppState.ACCEPTED);
// Now kill the application before new attempt is launched, the app report
// returns the invalid AM host and port.
KillApplicationRequest request = KillApplicationRequest.newInstance(application.getApplicationId());
rm.getClientRMService().forceKillApplication(request);
// Specific test for YARN-1689 follows
// Now let's say a race causes AM to register now. This should not crash RM.
am.registerAppAttempt(false);
// We explicitly intercepted the kill-event to RMAppAttempt, so app should
// still be in KILLING state.
rm.waitForState(application.getApplicationId(), RMAppState.KILLING);
// AM should now be in running
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// Simulate that appAttempt is killed.
rm.getRMContext().getDispatcher().getEventHandler().handle(new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_KILLED));
rm.waitForState(application.getApplicationId(), RMAppState.KILLED);
// test metrics
metrics = rm.getResourceScheduler().getRootQueueMetrics();
Assert.assertEquals(appsKilled + 1, metrics.getAppsKilled());
Assert.assertEquals(appsSubmitted + 1, metrics.getAppsSubmitted());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.
the class TestResourceTrackerService method testReconnectNode.
@Test
public void testReconnectNode() throws Exception {
rm = new MockRM() {
@Override
protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
return new EventDispatcher<SchedulerEvent>(this.scheduler, this.scheduler.getClass().getName()) {
@Override
public void handle(SchedulerEvent event) {
scheduler.handle(event);
}
};
}
};
rm.start();
MockNM nm1 = rm.registerNode("host1:1234", 5120);
MockNM nm2 = rm.registerNode("host2:5678", 5120);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(false);
rm.drainEvents();
checkUnhealthyNMCount(rm, nm2, true, 1);
final int expectedNMs = ClusterMetrics.getMetrics().getNumActiveNMs();
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// TODO Metrics incorrect in case of the FifoScheduler
Assert.assertEquals(5120, metrics.getAvailableMB());
// reconnect of healthy node
nm1 = rm.registerNode("host1:1234", 5120);
NodeHeartbeatResponse response = nm1.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
rm.drainEvents();
Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
checkUnhealthyNMCount(rm, nm2, true, 1);
// reconnect of unhealthy node
nm2 = rm.registerNode("host2:5678", 5120);
response = nm2.nodeHeartbeat(false);
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
rm.drainEvents();
Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
checkUnhealthyNMCount(rm, nm2, true, 1);
// unhealthy node changed back to healthy
nm2 = rm.registerNode("host2:5678", 5120);
response = nm2.nodeHeartbeat(true);
response = nm2.nodeHeartbeat(true);
rm.drainEvents();
Assert.assertEquals(5120 + 5120, metrics.getAvailableMB());
// reconnect of node with changed capability
nm1 = rm.registerNode("host2:5678", 10240);
response = nm1.nodeHeartbeat(true);
rm.drainEvents();
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
Assert.assertEquals(5120 + 10240, metrics.getAvailableMB());
// reconnect of node with changed capability and running applications
List<ApplicationId> runningApps = new ArrayList<ApplicationId>();
runningApps.add(ApplicationId.newInstance(1, 0));
nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps);
response = nm1.nodeHeartbeat(true);
rm.drainEvents();
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
// reconnect healthy node changing http port
nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService());
nm1.setHttpPort(3);
nm1.registerNode();
response = nm1.nodeHeartbeat(true);
response = nm1.nodeHeartbeat(true);
rm.drainEvents();
RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
Assert.assertEquals(3, rmNode.getHttpPort());
Assert.assertEquals(5120, rmNode.getTotalCapability().getMemorySize());
Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.
the class TestFairScheduler method testSimpleContainerAllocation.
@Test(timeout = 5000)
public void testSimpleContainerAllocation() throws IOException {
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add a node
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
// Add another node
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(512, 2), 2, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
createSchedulingRequest(512, 2, "queue1", "user1", 2);
scheduler.update();
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.handle(updateEvent);
// Asked for less than increment allocation.
assertEquals(FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2);
scheduler.handle(updateEvent2);
assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
assertEquals(2, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getVirtualCores());
// verify metrics
QueueMetrics queue1Metrics = scheduler.getQueueManager().getQueue("queue1").getMetrics();
assertEquals(1024, queue1Metrics.getAllocatedMB());
assertEquals(2, queue1Metrics.getAllocatedVirtualCores());
assertEquals(1024, scheduler.getRootQueueMetrics().getAllocatedMB());
assertEquals(2, scheduler.getRootQueueMetrics().getAllocatedVirtualCores());
assertEquals(512, scheduler.getRootQueueMetrics().getAvailableMB());
assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.
the class TestFairScheduler method testReservationMetrics.
@Test
public void testReservationMetrics() throws IOException {
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
QueueMetrics metrics = scheduler.getRootQueueMetrics();
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent);
ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1);
createApplicationWithAMResource(appAttemptId, "default", "user1", null);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.update();
scheduler.handle(updateEvent);
createSchedulingRequestExistingApplication(1024, 1, 1, appAttemptId);
scheduler.update();
scheduler.handle(updateEvent);
// no reservation yet
assertEquals(0, metrics.getReservedContainers());
assertEquals(0, metrics.getReservedMB());
assertEquals(0, metrics.getReservedVirtualCores());
// create reservation of {4096, 4}
createSchedulingRequestExistingApplication(4096, 4, 1, appAttemptId);
scheduler.update();
scheduler.handle(updateEvent);
// reservation created
assertEquals(1, metrics.getReservedContainers());
assertEquals(4096, metrics.getReservedMB());
assertEquals(4, metrics.getReservedVirtualCores());
// remove AppAttempt
AppAttemptRemovedSchedulerEvent attRemoveEvent = new AppAttemptRemovedSchedulerEvent(appAttemptId, RMAppAttemptState.KILLED, false);
scheduler.handle(attRemoveEvent);
// The reservation metrics should be subtracted
assertEquals(0, metrics.getReservedContainers());
assertEquals(0, metrics.getReservedMB());
assertEquals(0, metrics.getReservedVirtualCores());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics in project hadoop by apache.
the class TestWorkPreservingRMRestart method checkFSQueue.
private void checkFSQueue(ResourceManager rm, SchedulerApplication schedulerApp, Resource usedResources, Resource availableResources) throws Exception {
// waiting for RM's scheduling apps
int retry = 0;
Resource assumedFairShare = Resource.newInstance(8192, 8);
while (true) {
Thread.sleep(100);
if (assumedFairShare.equals(((FairScheduler) rm.getResourceScheduler()).getQueueManager().getRootQueue().getFairShare())) {
break;
}
retry++;
if (retry > 30) {
Assert.fail("Apps are not scheduled within assumed timeout");
}
}
FairScheduler scheduler = (FairScheduler) rm.getResourceScheduler();
FSParentQueue root = scheduler.getQueueManager().getRootQueue();
// ************ check cluster used Resources ********
assertTrue(root.getPolicy() instanceof DominantResourceFairnessPolicy);
assertEquals(usedResources, root.getResourceUsage());
// ************ check app headroom ****************
FSAppAttempt schedulerAttempt = (FSAppAttempt) schedulerApp.getCurrentAppAttempt();
assertEquals(availableResources, schedulerAttempt.getHeadroom());
// ************ check queue metrics ****************
QueueMetrics queueMetrics = scheduler.getRootQueueMetrics();
assertMetrics(queueMetrics, 1, 0, 1, 0, 2, availableResources.getMemorySize(), availableResources.getVirtualCores(), usedResources.getMemorySize(), usedResources.getVirtualCores());
}
Aggregations