use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.
the class ResourceTrackerService method registerNodeManager.
@SuppressWarnings("unchecked")
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
NodeId nodeId = request.getNodeId();
String host = nodeId.getHost();
int cmPort = nodeId.getPort();
int httpPort = request.getHttpPort();
Resource capability = request.getResource();
String nodeManagerVersion = request.getNMVersion();
Resource physicalResource = request.getPhysicalResource();
RegisterNodeManagerResponse response = recordFactory.newRecordInstance(RegisterNodeManagerResponse.class);
if (!minimumNodeManagerVersion.equals("NONE")) {
if (minimumNodeManagerVersion.equals("EqualToRM")) {
minimumNodeManagerVersion = YarnVersionInfo.getVersion();
}
if ((nodeManagerVersion == null) || (VersionUtil.compareVersions(nodeManagerVersion, minimumNodeManagerVersion)) < 0) {
String message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version " + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
}
// Check if this node is a 'valid' node
if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) {
String message = "Disallowed NodeManager from " + host + ", Sending SHUTDOWN signal to the NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
// check if node's capacity is load from dynamic-resources.xml
String nid = nodeId.toString();
Resource dynamicLoadCapability = loadNodeResourceFromDRConfiguration(nid);
if (dynamicLoadCapability != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Resource for node: " + nid + " is adjusted from: " + capability + " to: " + dynamicLoadCapability + " due to settings in dynamic-resources.xml.");
}
capability = dynamicLoadCapability;
// sync back with new resource.
response.setResource(capability);
}
// Check if this node has minimum allocations
if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) {
String message = "NodeManager from " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + " signal to the NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
response.setContainerTokenMasterKey(containerTokenSecretManager.getCurrentKey());
response.setNMTokenMasterKey(nmTokenSecretManager.getCurrentKey());
RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, resolve(host), capability, nodeManagerVersion, physicalResource);
RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
if (oldNode == null) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeStartedEvent(nodeId, request.getNMContainerStatuses(), request.getRunningApplications()));
} else {
LOG.info("Reconnect from the node at: " + host);
this.nmLivelinessMonitor.unregister(nodeId);
// Reset heartbeat ID since node just restarted.
oldNode.resetLastNodeHeartBeatResponse();
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeReconnectEvent(nodeId, rmNode, request.getRunningApplications(), request.getNMContainerStatuses()));
}
// On every node manager register we will be clearing NMToken keys if
// present for any running application.
this.nmTokenSecretManager.removeNodeKey(nodeId);
this.nmLivelinessMonitor.register(nodeId);
// RMNode inserted
if (!rmContext.isWorkPreservingRecoveryEnabled()) {
if (!request.getNMContainerStatuses().isEmpty()) {
LOG.info("received container statuses on node manager register :" + request.getNMContainerStatuses());
for (NMContainerStatus status : request.getNMContainerStatuses()) {
handleNMContainerStatus(status, nodeId);
}
}
}
// Update node's labels to RM's NodeLabelManager.
Set<String> nodeLabels = NodeLabelsUtils.convertToStringSet(request.getNodeLabels());
if (isDistributedNodeLabelsConf && nodeLabels != null) {
try {
updateNodeLabelsFromNMReport(nodeLabels, nodeId);
response.setAreNodeLabelsAcceptedByRM(true);
} catch (IOException ex) {
// Ensure the exception is captured in the response
response.setDiagnosticsMessage(ex.getMessage());
response.setAreNodeLabelsAcceptedByRM(false);
}
} else if (isDelegatedCentralizedNodeLabelsConf) {
this.rmContext.getRMDelegatedNodeLabelsUpdater().updateNodeLabels(nodeId);
}
StringBuilder message = new StringBuilder();
message.append("NodeManager from node ").append(host).append("(cmPort: ").append(cmPort).append(" httpPort: ");
message.append(httpPort).append(") ").append("registered with capability: ").append(capability);
message.append(", assigned nodeId ").append(nodeId);
if (response.getAreNodeLabelsAcceptedByRM()) {
message.append(", node labels { ").append(StringUtils.join(",", nodeLabels) + " } ");
}
LOG.info(message.toString());
response.setNodeAction(NodeAction.NORMAL);
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
response.setRMVersion(YarnVersionInfo.getVersion());
return response;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.
the class TestRMNodeTransitions method testForHandlingDuplicatedCompltedContainers.
@Test
public void testForHandlingDuplicatedCompltedContainers() {
// Start the node
node.handle(new RMNodeStartedEvent(null, null, null));
// Add info to the queue first
node.setNextHeartBeat(false);
ContainerId completedContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
RMNodeStatusEvent statusEvent1 = getMockRMNodeStatusEvent(null);
ContainerStatus containerStatus1 = mock(ContainerStatus.class);
doReturn(completedContainerId1).when(containerStatus1).getContainerId();
doReturn(Collections.singletonList(containerStatus1)).when(statusEvent1).getContainers();
verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
node.handle(statusEvent1);
verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
Assert.assertEquals(1, node.getQueueSize());
Assert.assertEquals(1, node.getCompletedContainers().size());
// test for duplicate entries
node.handle(statusEvent1);
Assert.assertEquals(1, node.getQueueSize());
// send clean up container event
node.handle(new RMNodeFinishedContainersPulledByAMEvent(node.getNodeID(), Collections.singletonList(completedContainerId1)));
NodeHeartbeatResponse hbrsp = Records.newRecord(NodeHeartbeatResponse.class);
node.updateNodeHeartbeatResponseForCleanup(hbrsp);
Assert.assertEquals(1, hbrsp.getContainersToBeRemovedFromNM().size());
Assert.assertEquals(0, node.getCompletedContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.
the class TestRMNodeTransitions method testStatusChange.
@Test(timeout = 5000)
public void testStatusChange() {
//Start the node
node.handle(new RMNodeStartedEvent(null, null, null));
//Add info to the queue first
node.setNextHeartBeat(false);
ContainerId completedContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
ContainerId completedContainerId2 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(1, 1), 1), 1);
RMNodeStatusEvent statusEvent1 = getMockRMNodeStatusEvent(null);
RMNodeStatusEvent statusEvent2 = getMockRMNodeStatusEvent(null);
ContainerStatus containerStatus1 = mock(ContainerStatus.class);
ContainerStatus containerStatus2 = mock(ContainerStatus.class);
doReturn(completedContainerId1).when(containerStatus1).getContainerId();
doReturn(Collections.singletonList(containerStatus1)).when(statusEvent1).getContainers();
doReturn(completedContainerId2).when(containerStatus2).getContainerId();
doReturn(Collections.singletonList(containerStatus2)).when(statusEvent2).getContainers();
verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
node.handle(statusEvent1);
node.handle(statusEvent2);
verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
Assert.assertEquals(2, node.getQueueSize());
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
Assert.assertEquals(0, node.getQueueSize());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.
the class TestRMNodeTransitions method getRunningNode.
private RMNodeImpl getRunningNode(String nmVersion, int port) {
NodeId nodeId = BuilderUtils.newNodeId("localhost", port);
Resource capability = Resource.newInstance(4096, 4);
RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability, nmVersion);
node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null));
Assert.assertEquals(NodeState.RUNNING, node.getState());
return node;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.
the class TestRMNodeTransitions method testAdd.
@Test
public void testAdd() {
RMNodeImpl node = getNewNode();
ClusterMetrics cm = ClusterMetrics.getMetrics();
int initialActive = cm.getNumActiveNMs();
int initialLost = cm.getNumLostNMs();
int initialUnhealthy = cm.getUnhealthyNMs();
int initialDecommissioned = cm.getNumDecommisionedNMs();
int initialRebooted = cm.getNumRebootedNMs();
node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null));
Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
Assert.assertEquals(NodeState.RUNNING, node.getState());
Assert.assertNotNull(nodesListManagerEvent);
Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
}
Aggregations