Search in sources :

Example 1 with RMNodeEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.

the class NodesListManager method setDecomissionedNMs.

private void setDecomissionedNMs() {
    Set<String> excludeList = hostsReader.getExcludedHosts();
    for (final String host : excludeList) {
        NodeId nodeId = createUnknownNodeId(host);
        RMNodeImpl rmNode = new RMNodeImpl(nodeId, rmContext, host, -1, -1, new UnknownNode(host), Resource.newInstance(0, 0), "unknown");
        rmContext.getInactiveRMNodes().put(nodeId, rmNode);
        rmNode.handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
    }
}
Also used : NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)

Example 2 with RMNodeEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.

the class NodesListManager method handleExcludeNodeList.

// Handle excluded nodes based on following rules:
// Recommission DECOMMISSIONED or DECOMMISSIONING nodes no longer excluded;
// Gracefully decommission excluded nodes that are not already
// DECOMMISSIONED nor DECOMMISSIONING; Take no action for excluded nodes
// that are already DECOMMISSIONED or DECOMMISSIONING.
private void handleExcludeNodeList(boolean graceful, Integer timeout) {
    // DECOMMISSIONED/DECOMMISSIONING nodes need to be re-commissioned.
    List<RMNode> nodesToRecom = new ArrayList<RMNode>();
    // Nodes need to be decommissioned (graceful or forceful);
    List<RMNode> nodesToDecom = new ArrayList<RMNode>();
    Set<String> includes = new HashSet<String>();
    Map<String, Integer> excludes = new HashMap<String, Integer>();
    hostsReader.getHostDetails(includes, excludes);
    for (RMNode n : this.rmContext.getRMNodes().values()) {
        NodeState s = n.getState();
        // An invalid node (either due to explicit exclude or not include)
        // should be excluded.
        boolean isExcluded = !isValidNode(n.getHostName(), includes, excludes.keySet());
        String nodeStr = "node " + n.getNodeID() + " with state " + s;
        if (!isExcluded) {
            // Note that no action is needed for DECOMMISSIONED node.
            if (s == NodeState.DECOMMISSIONING) {
                LOG.info("Recommission " + nodeStr);
                nodesToRecom.add(n);
            }
        // Otherwise no-action needed.
        } else {
            // exclude is true.
            if (graceful) {
                // Use per node timeout if exist otherwise the request timeout.
                Integer timeoutToUse = (excludes.get(n.getHostName()) != null) ? excludes.get(n.getHostName()) : timeout;
                if (s != NodeState.DECOMMISSIONED && s != NodeState.DECOMMISSIONING) {
                    LOG.info("Gracefully decommission " + nodeStr);
                    nodesToDecom.add(n);
                } else if (s == NodeState.DECOMMISSIONING && !Objects.equals(n.getDecommissioningTimeout(), timeoutToUse)) {
                    LOG.info("Update " + nodeStr + " timeout to be " + timeoutToUse);
                    nodesToDecom.add(n);
                } else {
                    LOG.info("No action for " + nodeStr);
                }
            } else {
                if (s != NodeState.DECOMMISSIONED) {
                    LOG.info("Forcefully decommission " + nodeStr);
                    nodesToDecom.add(n);
                }
            }
        }
    }
    for (RMNode n : nodesToRecom) {
        RMNodeEvent e = new RMNodeEvent(n.getNodeID(), RMNodeEventType.RECOMMISSION);
        this.rmContext.getDispatcher().getEventHandler().handle(e);
    }
    for (RMNode n : nodesToDecom) {
        RMNodeEvent e;
        if (graceful) {
            Integer timeoutToUse = (excludes.get(n.getHostName()) != null) ? excludes.get(n.getHostName()) : timeout;
            e = new RMNodeDecommissioningEvent(n.getNodeID(), timeoutToUse);
        } else {
            RMNodeEventType eventType = isUntrackedNode(n.getHostName()) ? RMNodeEventType.SHUTDOWN : RMNodeEventType.DECOMMISSION;
            e = new RMNodeEvent(n.getNodeID(), eventType);
        }
        this.rmContext.getDispatcher().getEventHandler().handle(e);
    }
    updateInactiveNodes();
}
Also used : RMNodeDecommissioningEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecommissioningEvent) NodeState(org.apache.hadoop.yarn.api.records.NodeState) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) RMNodeEventType(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType) ArrayList(java.util.ArrayList) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent) HashSet(java.util.HashSet)

Example 3 with RMNodeEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.

the class MockRM method sendNodeLost.

public void sendNodeLost(MockNM nm) throws Exception {
    RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(nm.getNodeId());
    node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.EXPIRE));
    drainEventsImplicitly();
}
Also used : RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)

Example 4 with RMNodeEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.

the class ResourceTrackerService method unRegisterNodeManager.

@SuppressWarnings("unchecked")
@Override
public UnRegisterNodeManagerResponse unRegisterNodeManager(UnRegisterNodeManagerRequest request) throws YarnException, IOException {
    UnRegisterNodeManagerResponse response = recordFactory.newRecordInstance(UnRegisterNodeManagerResponse.class);
    NodeId nodeId = request.getNodeId();
    RMNode rmNode = this.rmContext.getRMNodes().get(nodeId);
    if (rmNode == null) {
        LOG.info("Node not found, ignoring the unregister from node id : " + nodeId);
        return response;
    }
    LOG.info("Node with node id : " + nodeId + " has shutdown, hence unregistering the node.");
    this.nmLivelinessMonitor.unregister(nodeId);
    this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeEvent(nodeId, RMNodeEventType.SHUTDOWN));
    return response;
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeId(org.apache.hadoop.yarn.api.records.NodeId) UnRegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)

Example 5 with RMNodeEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent in project hadoop by apache.

the class TestRMNodeTransitions method testRunningRebooting.

@Test
public void testRunningRebooting() {
    RMNodeImpl node = getRunningNode();
    ClusterMetrics cm = ClusterMetrics.getMetrics();
    int initialActive = cm.getNumActiveNMs();
    int initialLost = cm.getNumLostNMs();
    int initialUnhealthy = cm.getUnhealthyNMs();
    int initialDecommissioned = cm.getNumDecommisionedNMs();
    int initialRebooted = cm.getNumRebootedNMs();
    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.REBOOTING));
    Assert.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs());
    Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
    Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
    Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
    Assert.assertEquals("Rebooted Nodes", initialRebooted + 1, cm.getNumRebootedNMs());
    Assert.assertEquals(NodeState.REBOOTED, node.getState());
}
Also used : RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent) Test(org.junit.Test)

Aggregations

RMNodeEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)27 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)20 Test (org.junit.Test)18 NodeId (org.apache.hadoop.yarn.api.records.NodeId)6 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)4 Resource (org.apache.hadoop.yarn.api.records.Resource)3 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)2 RMNodeEventType (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType)2 RMNodeStartedEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent)2 RMNodeStatusEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 Container (org.apache.hadoop.yarn.api.records.Container)1 NodeState (org.apache.hadoop.yarn.api.records.NodeState)1