use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testPreferenceOfNeedyAppsTowardsNodePartitions.
@Test
public void testPreferenceOfNeedyAppsTowardsNodePartitions() throws Exception {
/**
* Test case: Submit two application to a queue (app1 first then app2), app1
* asked for no-label, app2 asked for label=x, when node1 has label=x
* doing heart beat, app2 will get allocation first, even if app2 submits later
* than app1
*/
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y", false)));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("y")));
// inject node label manager
MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
// label = y
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
// label = <empty>
MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB);
// launch an app to queue b1 (label = y), AM container should be launched in nm2
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
// launch another app to queue b1 (label = y), AM container should be launched in nm2
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// request container and nm1 do heartbeat (nm2 has label=y), note that app1
// request non-labeled container, and app2 request labeled container, app2
// will get allocated first even if app1 submitted first.
am1.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>());
am2.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>(), "y");
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// Do node heartbeats many times
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App2 will get preference to be allocated on node1, and node1 will be all
// used by App2.
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// app1 get nothing in nm1 (partition=y)
checkNumOfContainersInAnAppOnGivenNode(0, nm1.getNodeId(), schedulerApp1);
checkNumOfContainersInAnAppOnGivenNode(9, nm2.getNodeId(), schedulerApp1);
// app2 get all resource in nm1 (partition=y)
checkNumOfContainersInAnAppOnGivenNode(8, nm1.getNodeId(), schedulerApp2);
checkNumOfContainersInAnAppOnGivenNode(1, nm2.getNodeId(), schedulerApp2);
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestApplicationLifetimeMonitor method testApplicationLifetimeMonitor.
@Test(timeout = 60000)
public void testApplicationLifetimeMonitor() throws Exception {
MockRM rm = null;
try {
rm = new MockRM(conf);
rm.start();
Priority appPriority = Priority.newInstance(0);
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 16 * 1024);
Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
timeouts.put(ApplicationTimeoutType.LIFETIME, 10L);
RMApp app1 = rm.submitApp(1024, appPriority, timeouts);
// 20L seconds
timeouts.put(ApplicationTimeoutType.LIFETIME, 20L);
RMApp app2 = rm.submitApp(1024, appPriority, timeouts);
nm1.nodeHeartbeat(true);
// Send launch Event
MockAM am1 = rm.sendAMLaunched(app1.getCurrentAppAttempt().getAppAttemptId());
am1.registerAppAttempt();
rm.waitForState(app1.getApplicationId(), RMAppState.KILLED);
Assert.assertTrue("Application killed before lifetime value", (System.currentTimeMillis() - app1.getSubmitTime()) > 10000);
Map<ApplicationTimeoutType, String> updateTimeout = new HashMap<ApplicationTimeoutType, String>();
long newLifetime = 10L;
// update 10L seconds more to timeout
String formatISO8601 = Times.formatISO8601(System.currentTimeMillis() + newLifetime * 1000);
updateTimeout.put(ApplicationTimeoutType.LIFETIME, formatISO8601);
UpdateApplicationTimeoutsRequest request = UpdateApplicationTimeoutsRequest.newInstance(app2.getApplicationId(), updateTimeout);
Map<ApplicationTimeoutType, Long> applicationTimeouts = app2.getApplicationTimeouts();
// has old timeout time
long beforeUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
// update app2 lifetime to new time i.e now + timeout
rm.getRMContext().getClientRMService().updateApplicationTimeouts(request);
applicationTimeouts = app2.getApplicationTimeouts();
long afterUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
Assert.assertTrue("Application lifetime value not updated", afterUpdate > beforeUpdate);
// verify for application report.
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
GetApplicationReportRequest appRequest = recordFactory.newRecordInstance(GetApplicationReportRequest.class);
appRequest.setApplicationId(app2.getApplicationId());
Map<ApplicationTimeoutType, ApplicationTimeout> appTimeouts = rm.getRMContext().getClientRMService().getApplicationReport(appRequest).getApplicationReport().getApplicationTimeouts();
Assert.assertTrue("Application Timeout are empty.", !appTimeouts.isEmpty());
ApplicationTimeout timeout = appTimeouts.get(ApplicationTimeoutType.LIFETIME);
Assert.assertEquals("Application timeout string is incorrect.", formatISO8601, timeout.getExpiryTime());
Assert.assertTrue("Application remaining time is incorrect", timeout.getRemainingTime() > 0);
rm.waitForState(app2.getApplicationId(), RMAppState.KILLED);
// verify for app killed with updated lifetime
Assert.assertTrue("Application killed before lifetime value", app2.getFinishTime() > afterUpdate);
} finally {
stopRM(rm);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestApplicationLifetimeMonitor method testApplicationLifetimeOnRMRestart.
@Test(timeout = 180000)
public void testApplicationLifetimeOnRMRestart() throws Exception {
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
nm1.nodeHeartbeat(true);
long appLifetime = 60L;
Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Re-start RM
MockRM rm2 = new MockRM(conf, memStore);
// make sure app has been unregistered with old RM else both will trigger
// Expire event
rm1.getRMContext().getRMAppLifetimeMonitor().unregisterApp(app1.getApplicationId(), ApplicationTimeoutType.LIFETIME);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING);
NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
nm1.registerNode(Arrays.asList(amContainer, runningContainer), null);
// Wait for RM to settle down on recovering containers;
TestWorkPreservingRMRestart.waitForNumContainersToRecover(2, rm2, am1.getApplicationAttemptId());
Set<ContainerId> launchedContainers = ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())).getLaunchedContainers();
assertTrue(launchedContainers.contains(amContainer.getContainerId()));
assertTrue(launchedContainers.contains(runningContainer.getContainerId()));
// check RMContainers are re-recreated and the container state is correct.
rm2.waitForState(nm1, amContainer.getContainerId(), RMContainerState.RUNNING);
rm2.waitForState(nm1, runningContainer.getContainerId(), RMContainerState.RUNNING);
// re register attempt to rm2
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt();
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.RUNNING);
// wait for app life time and application to be in killed state.
rm2.waitForState(recoveredApp1.getApplicationId(), RMAppState.KILLED);
Assert.assertTrue("Application killed before lifetime value", recoveredApp1.getFinishTime() > (recoveredApp1.getSubmitTime() + appLifetime * 1000));
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestNMReconnect method testRMNodeStatusAfterReconnect.
@Test(timeout = 10000)
public void testRMNodeStatusAfterReconnect() throws Exception {
// The node(127.0.0.1:1234) reconnected with RM. When it registered with
// RM, RM set its lastNodeHeartbeatResponse's id to 0 asynchronously. But
// the node's heartbeat come before RM succeeded setting the id to 0.
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = new MockRM() {
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
nm1.registerNode();
int i = 0;
while (i < 3) {
nm1.nodeHeartbeat(true);
dispatcher.await();
i++;
}
MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
nm2.registerNode();
RMNode rmNode = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm2.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals("Node is Not in Running state.", NodeState.RUNNING, rmNode.getState());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockNM in project hadoop by apache.
the class TestNMReconnect method testDecommissioningNodeReconnect.
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testDecommissioningNodeReconnect() throws Exception {
MockRM rm = new MockRM();
rm.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
nm1.registerNode();
rm.waitForState(nm1.getNodeId(), NodeState.RUNNING);
rm.getRMContext().getDispatcher().getEventHandler().handle(new RMNodeEvent(nm1.getNodeId(), RMNodeEventType.GRACEFUL_DECOMMISSION));
rm.waitForState(nm1.getNodeId(), NodeState.DECOMMISSIONING);
MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService());
RegisterNodeManagerResponse response = nm2.registerNode();
// not SHUTDOWN
Assert.assertTrue(response.getNodeAction().equals(NodeAction.NORMAL));
rm.stop();
}
Aggregations