use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.
the class NodesListManager method setDecomissionedNMs.
private void setDecomissionedNMs() {
Set<String> excludeList = hostsReader.getExcludedHosts();
for (final String host : excludeList) {
NodeId nodeId = createUnknownNodeId(host);
RMNodeImpl rmNode = new RMNodeImpl(nodeId, rmContext, host, -1, -1, new UnknownNode(host), Resource.newInstance(0, 0), "unknown");
rmContext.getInactiveRMNodes().put(nodeId, rmNode);
rmNode.handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.
the class NodesListManager method handleExcludeNodeList.
// Handle excluded nodes based on following rules:
// Recommission DECOMMISSIONED or DECOMMISSIONING nodes no longer excluded;
// Gracefully decommission excluded nodes that are not already
// DECOMMISSIONED nor DECOMMISSIONING; Take no action for excluded nodes
// that are already DECOMMISSIONED or DECOMMISSIONING.
private void handleExcludeNodeList(boolean graceful, Integer timeout) {
// DECOMMISSIONED/DECOMMISSIONING nodes need to be re-commissioned.
List<RMNode> nodesToRecom = new ArrayList<RMNode>();
// Nodes need to be decommissioned (graceful or forceful);
List<RMNode> nodesToDecom = new ArrayList<RMNode>();
Set<String> includes = new HashSet<String>();
Map<String, Integer> excludes = new HashMap<String, Integer>();
hostsReader.getHostDetails(includes, excludes);
for (RMNode n : this.rmContext.getRMNodes().values()) {
NodeState s = n.getState();
// An invalid node (either due to explicit exclude or not include)
// should be excluded.
boolean isExcluded = !isValidNode(n.getHostName(), includes, excludes.keySet());
String nodeStr = "node " + n.getNodeID() + " with state " + s;
if (!isExcluded) {
// Note that no action is needed for DECOMMISSIONED node.
if (s == NodeState.DECOMMISSIONING) {
LOG.info("Recommission " + nodeStr);
nodesToRecom.add(n);
}
// Otherwise no-action needed.
} else {
// exclude is true.
if (graceful) {
// Use per node timeout if exist otherwise the request timeout.
Integer timeoutToUse = (excludes.get(n.getHostName()) != null) ? excludes.get(n.getHostName()) : timeout;
if (s != NodeState.DECOMMISSIONED && s != NodeState.DECOMMISSIONING) {
LOG.info("Gracefully decommission " + nodeStr);
nodesToDecom.add(n);
} else if (s == NodeState.DECOMMISSIONING && !Objects.equals(n.getDecommissioningTimeout(), timeoutToUse)) {
LOG.info("Update " + nodeStr + " timeout to be " + timeoutToUse);
nodesToDecom.add(n);
} else {
LOG.info("No action for " + nodeStr);
}
} else {
if (s != NodeState.DECOMMISSIONED) {
LOG.info("Forcefully decommission " + nodeStr);
nodesToDecom.add(n);
}
}
}
}
for (RMNode n : nodesToRecom) {
RMNodeEvent e = new RMNodeEvent(n.getNodeID(), RMNodeEventType.RECOMMISSION);
this.rmContext.getDispatcher().getEventHandler().handle(e);
}
for (RMNode n : nodesToDecom) {
RMNodeEvent e;
if (graceful) {
Integer timeoutToUse = (excludes.get(n.getHostName()) != null) ? excludes.get(n.getHostName()) : timeout;
e = new RMNodeDecommissioningEvent(n.getNodeID(), timeoutToUse);
} else {
RMNodeEventType eventType = isUntrackedNode(n.getHostName()) ? RMNodeEventType.SHUTDOWN : RMNodeEventType.DECOMMISSION;
e = new RMNodeEvent(n.getNodeID(), eventType);
}
this.rmContext.getDispatcher().getEventHandler().handle(e);
}
updateInactiveNodes();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.
the class MockRM method sendNodeLost.
public void sendNodeLost(MockNM nm) throws Exception {
RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(nm.getNodeId());
node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.EXPIRE));
drainEventsImplicitly();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.
the class ResourceTrackerService method unRegisterNodeManager.
@SuppressWarnings("unchecked")
@Override
public UnRegisterNodeManagerResponse unRegisterNodeManager(UnRegisterNodeManagerRequest request) throws YarnException, IOException {
UnRegisterNodeManagerResponse response = recordFactory.newRecordInstance(UnRegisterNodeManagerResponse.class);
NodeId nodeId = request.getNodeId();
RMNode rmNode = this.rmContext.getRMNodes().get(nodeId);
if (rmNode == null) {
LOG.info("Node not found, ignoring the unregister from node id : " + nodeId);
return response;
}
LOG.info("Node with node id : " + nodeId + " has shutdown, hence unregistering the node.");
this.nmLivelinessMonitor.unregister(nodeId);
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeEvent(nodeId, RMNodeEventType.SHUTDOWN));
return response;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.
the class TestRMNodeTransitions method testRunningRebooting.
@Test
public void testRunningRebooting() {
RMNodeImpl node = getRunningNode();
ClusterMetrics cm = ClusterMetrics.getMetrics();
int initialActive = cm.getNumActiveNMs();
int initialLost = cm.getNumLostNMs();
int initialUnhealthy = cm.getUnhealthyNMs();
int initialDecommissioned = cm.getNumDecommisionedNMs();
int initialRebooted = cm.getNumRebootedNMs();
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.REBOOTING));
Assert.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs());
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
Assert.assertEquals("Rebooted Nodes", initialRebooted + 1, cm.getNumRebootedNMs());
Assert.assertEquals(NodeState.REBOOTED, node.getState());
}
Aggregations