use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.
the class TestResourceTrackerService method testNodeHeartbeatForAppCollectorsMap.
@Test
public void testNodeHeartbeatForAppCollectorsMap() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
// set version to 2
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
// enable aux-service based timeline collectors
conf.set(YarnConfiguration.NM_AUX_SERVICES, "timeline_collector");
conf.set(YarnConfiguration.NM_AUX_SERVICES + "." + "timeline_collector" + ".class", PerNodeTimelineCollectorsAuxService.class.getName());
conf.setClass(YarnConfiguration.TIMELINE_SERVICE_WRITER_CLASS, FileSystemTimelineWriterImpl.class, TimelineWriter.class);
rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("host1:1234", 5120);
MockNM nm2 = rm.registerNode("host2:1234", 2048);
NodeHeartbeatResponse nodeHeartbeat1 = nm1.nodeHeartbeat(true);
NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true);
RMNodeImpl node1 = (RMNodeImpl) rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNodeImpl node2 = (RMNodeImpl) rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMApp app1 = rm.submitApp(1024);
String collectorAddr1 = "1.2.3.4:5";
app1.setCollectorAddr(collectorAddr1);
String collectorAddr2 = "5.4.3.2:1";
RMApp app2 = rm.submitApp(1024);
app2.setCollectorAddr(collectorAddr2);
// Create a running container for app1 running on nm1
ContainerId runningContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(app1.getApplicationId(), 0), 0);
ContainerStatus status1 = ContainerStatus.newInstance(runningContainerId1, ContainerState.RUNNING, "", 0);
List<ContainerStatus> statusList = new ArrayList<ContainerStatus>();
statusList.add(status1);
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(true, "", System.currentTimeMillis());
NodeStatus nodeStatus = NodeStatus.newInstance(nm1.getNodeId(), 0, statusList, null, nodeHealth, null, null, null);
node1.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeStatus, nodeHeartbeat1));
Assert.assertEquals(1, node1.getRunningApps().size());
Assert.assertEquals(app1.getApplicationId(), node1.getRunningApps().get(0));
// Create a running container for app2 running on nm2
ContainerId runningContainerId2 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(app2.getApplicationId(), 0), 0);
ContainerStatus status2 = ContainerStatus.newInstance(runningContainerId2, ContainerState.RUNNING, "", 0);
statusList = new ArrayList<ContainerStatus>();
statusList.add(status2);
nodeStatus = NodeStatus.newInstance(nm1.getNodeId(), 0, statusList, null, nodeHealth, null, null, null);
node2.handle(new RMNodeStatusEvent(nm2.getNodeId(), nodeStatus, nodeHeartbeat2));
Assert.assertEquals(1, node2.getRunningApps().size());
Assert.assertEquals(app2.getApplicationId(), node2.getRunningApps().get(0));
nodeHeartbeat1 = nm1.nodeHeartbeat(true);
Map<ApplicationId, String> map1 = nodeHeartbeat1.getAppCollectorsMap();
Assert.assertEquals(1, map1.size());
Assert.assertEquals(collectorAddr1, map1.get(app1.getApplicationId()));
nodeHeartbeat2 = nm2.nodeHeartbeat(true);
Map<ApplicationId, String> map2 = nodeHeartbeat2.getAppCollectorsMap();
Assert.assertEquals(1, map2.size());
Assert.assertEquals(collectorAddr2, map2.get(app2.getApplicationId()));
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.
the class TestResourceTrackerService method testGetNextHeartBeatInterval.
/**
* Test RM read NM next heartBeat Interval correctly from Configuration file,
* and NM get next heartBeat Interval from RM correctly
*/
@Test(timeout = 50000)
public void testGetNextHeartBeatInterval() throws Exception {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, "4000");
rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("host1:1234", 5120);
MockNM nm2 = rm.registerNode("host2:5678", 10240);
NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
Assert.assertEquals(4000, nodeHeartbeat.getNextHeartBeatInterval());
NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true);
Assert.assertEquals(4000, nodeHeartbeat2.getNextHeartBeatInterval());
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.
the class TestResourceTrackerService method testAddNewExcludePathToConfiguration.
/**
* Decommissioning using a post-configured exclude hosts file
*/
@Test
public void testAddNewExcludePathToConfiguration() throws Exception {
Configuration conf = new Configuration();
rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("host1:1234", 5120);
MockNM nm2 = rm.registerNode("host2:5678", 10240);
ClusterMetrics metrics = ClusterMetrics.getMetrics();
assert (metrics != null);
int initialMetricCount = metrics.getNumDecommisionedNMs();
NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
nodeHeartbeat = nm2.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
writeToHostsFile("host2");
conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());
rm.getNodesListManager().refreshNodes(conf);
checkDecommissionedNMCount(rm, ++initialMetricCount);
nodeHeartbeat = nm1.nodeHeartbeat(true);
Assert.assertEquals("Node should not have been decomissioned.", NodeAction.NORMAL, nodeHeartbeat.getNodeAction());
nodeHeartbeat = nm2.nodeHeartbeat(true);
Assert.assertEquals("Node should have been decomissioned but is in state" + nodeHeartbeat.getNodeAction(), NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.
the class TestResourceTrackerService method testGracefulDecommissionWithApp.
/**
* Graceful decommission node with running application.
*/
@Test
public void testGracefulDecommissionWithApp() throws Exception {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());
writeToHostsFile("");
rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("host1:1234", 10240);
MockNM nm2 = rm.registerNode("host2:5678", 20480);
MockNM nm3 = rm.registerNode("host3:4433", 10240);
NodeId id1 = nm1.getNodeId();
NodeId id3 = nm3.getNodeId();
rm.waitForState(id1, NodeState.RUNNING);
rm.waitForState(id3, NodeState.RUNNING);
// Create an app and launch two containers on host1.
RMApp app = rm.submitApp(2000);
MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
ApplicationAttemptId aaid = app.getCurrentAppAttempt().getAppAttemptId();
nm1.nodeHeartbeat(aaid, 2, ContainerState.RUNNING);
nm3.nodeHeartbeat(true);
// Graceful decommission host1 and host3
writeToHostsFile("host1", "host3");
rm.getNodesListManager().refreshNodes(conf, true);
rm.waitForState(id1, NodeState.DECOMMISSIONING);
rm.waitForState(id3, NodeState.DECOMMISSIONING);
// host1 should be DECOMMISSIONING due to running containers.
// host3 should become DECOMMISSIONED.
nm1.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
rm.waitForState(id1, NodeState.DECOMMISSIONING);
rm.waitForState(id3, NodeState.DECOMMISSIONED);
nm1.nodeHeartbeat(aaid, 2, ContainerState.RUNNING);
// Complete containers on host1.
// Since the app is still RUNNING, expect NodeAction.NORMAL.
NodeHeartbeatResponse nodeHeartbeat1 = nm1.nodeHeartbeat(aaid, 2, ContainerState.COMPLETE);
Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat1.getNodeAction());
// Finish the app and verified DECOMMISSIONED.
MockRM.finishAMAndVerifyAppState(app, rm, nm1, am);
rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);
nodeHeartbeat1 = nm1.nodeHeartbeat(aaid, 2, ContainerState.COMPLETE);
Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat1.getNodeAction());
rm.waitForState(id1, NodeState.DECOMMISSIONED);
}
use of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse in project hadoop by apache.
the class TestResourceTrackerService method testNodeRemovalUtilUnhealthy.
private void testNodeRemovalUtilUnhealthy(boolean doGraceful) throws Exception {
Configuration conf = new Configuration();
int timeoutValue = 500;
conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath());
conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, excludeHostFile.getAbsolutePath());
writeToHostsFile(hostFile, "host1", "localhost", "host2");
writeToHostsFile(excludeHostFile, "");
conf.setInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, timeoutValue);
rm = new MockRM(conf);
rm.init(conf);
rm.start();
RMContext rmContext = rm.getRMContext();
refreshNodesOption(doGraceful, conf);
MockNM nm1 = rm.registerNode("host1:1234", 5120);
MockNM nm2 = rm.registerNode("host2:5678", 10240);
MockNM nm3 = rm.registerNode("localhost:4433", 1024);
ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
ClusterMetrics metrics = clusterMetrics;
assert (metrics != null);
rm.drainEvents();
//check all 3 nodes joined in as NORMAL
NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
nodeHeartbeat = nm2.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
nodeHeartbeat = nm3.nodeHeartbeat(true);
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
rm.drainEvents();
Assert.assertEquals("All 3 nodes should be active", metrics.getNumActiveNMs(), 3);
// node healthy
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(false);
nm3.nodeHeartbeat(true);
checkUnhealthyNMCount(rm, nm2, true, 1);
writeToHostsFile(hostFile, "host1", "localhost");
writeToHostsFile(excludeHostFile, "");
refreshNodesOption(doGraceful, conf);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(false);
nm3.nodeHeartbeat(true);
rm.drainEvents();
if (!doGraceful) {
Assert.assertNotEquals("host2 should be a shutdown NM!", rmContext.getInactiveRMNodes().get(nm2.getNodeId()), null);
Assert.assertEquals("host2 should be a shutdown NM!", rmContext.getInactiveRMNodes().get(nm2.getNodeId()).getState(), NodeState.SHUTDOWN);
}
Assert.assertEquals("There should be 2 Active NM!", clusterMetrics.getNumActiveNMs(), 2);
if (!doGraceful) {
Assert.assertEquals("There should be 1 Shutdown NM!", clusterMetrics.getNumShutdownNMs(), 1);
}
Assert.assertEquals("There should be 0 Unhealthy NM!", clusterMetrics.getUnhealthyNMs(), 0);
int nodeRemovalTimeout = conf.getInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, YarnConfiguration.DEFAULT_RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC);
int nodeRemovalInterval = rmContext.getNodesListManager().getNodeRemovalCheckInterval();
long maxThreadSleeptime = nodeRemovalInterval + nodeRemovalTimeout;
int waitCount = 0;
while (rmContext.getInactiveRMNodes().get(nm2.getNodeId()) != null && waitCount++ < 2) {
synchronized (this) {
wait(maxThreadSleeptime);
}
}
Assert.assertEquals("host2 should have been forgotten!", rmContext.getInactiveRMNodes().get(nm2.getNodeId()), null);
Assert.assertEquals("There should be no Shutdown NMs!", clusterMetrics.getNumRebootedNMs(), 0);
Assert.assertEquals("There should be 2 Active NM!", clusterMetrics.getNumActiveNMs(), 2);
rm.stop();
}
Aggregations