Search in sources :

Example 11 with NodeState

use of org.apache.hadoop.yarn.api.records.NodeState in project hadoop by apache.

the class RMContainerAllocator method handleUpdatedNodes.

@SuppressWarnings("unchecked")
private void handleUpdatedNodes(AllocateResponse response) {
    // send event to the job about on updated nodes
    List<NodeReport> updatedNodes = response.getUpdatedNodes();
    if (!updatedNodes.isEmpty()) {
        // send event to the job to act upon completed tasks
        eventHandler.handle(new JobUpdatedNodesEvent(getJob().getID(), updatedNodes));
        // act upon running tasks
        HashSet<NodeId> unusableNodes = new HashSet<NodeId>();
        for (NodeReport nr : updatedNodes) {
            NodeState nodeState = nr.getNodeState();
            if (nodeState.isUnusable()) {
                unusableNodes.add(nr.getNodeId());
            }
        }
        for (int i = 0; i < 2; ++i) {
            HashMap<TaskAttemptId, Container> taskSet = i == 0 ? assignedRequests.maps : assignedRequests.reduces;
            // kill running containers
            for (Map.Entry<TaskAttemptId, Container> entry : taskSet.entrySet()) {
                TaskAttemptId tid = entry.getKey();
                NodeId taskAttemptNodeId = entry.getValue().getNodeId();
                if (unusableNodes.contains(taskAttemptNodeId)) {
                    LOG.info("Killing taskAttempt:" + tid + " because it is running on unusable node:" + taskAttemptNodeId);
                    // If map, reschedule next task attempt.
                    boolean rescheduleNextAttempt = (i == 0) ? true : false;
                    eventHandler.handle(new TaskAttemptKillEvent(tid, "TaskAttempt killed because it ran on unusable node" + taskAttemptNodeId, rescheduleNextAttempt));
                }
            }
        }
    }
}
Also used : NodeState(org.apache.hadoop.yarn.api.records.NodeState) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttemptKillEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent) JobUpdatedNodesEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobUpdatedNodesEvent) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) HashSet(java.util.HashSet)

Example 12 with NodeState

use of org.apache.hadoop.yarn.api.records.NodeState in project hadoop by apache.

the class YarnClientImpl method getNodeReports.

@Override
public List<NodeReport> getNodeReports(NodeState... states) throws YarnException, IOException {
    EnumSet<NodeState> statesSet = (states.length == 0) ? EnumSet.allOf(NodeState.class) : EnumSet.noneOf(NodeState.class);
    for (NodeState state : states) {
        statesSet.add(state);
    }
    GetClusterNodesRequest request = GetClusterNodesRequest.newInstance(statesSet);
    GetClusterNodesResponse response = rmClient.getClusterNodes(request);
    return response.getNodeReports();
}
Also used : NodeState(org.apache.hadoop.yarn.api.records.NodeState) GetClusterNodesResponse(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse) GetClusterNodesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)

Example 13 with NodeState

use of org.apache.hadoop.yarn.api.records.NodeState in project hadoop by apache.

the class NodeCLI method listClusterNodes.

/**
   * Lists the nodes matching the given node states
   * 
   * @param nodeStates
   * @throws YarnException
   * @throws IOException
   */
private void listClusterNodes(Set<NodeState> nodeStates) throws YarnException, IOException {
    PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Charset.forName("UTF-8")));
    List<NodeReport> nodesReport = client.getNodeReports(nodeStates.toArray(new NodeState[0]));
    writer.println("Total Nodes:" + nodesReport.size());
    writer.printf(NODES_PATTERN, "Node-Id", "Node-State", "Node-Http-Address", "Number-of-Running-Containers");
    for (NodeReport nodeReport : nodesReport) {
        writer.printf(NODES_PATTERN, nodeReport.getNodeId(), nodeReport.getNodeState(), nodeReport.getHttpAddress(), nodeReport.getNumContainers());
    }
    writer.flush();
}
Also used : NodeState(org.apache.hadoop.yarn.api.records.NodeState) OutputStreamWriter(java.io.OutputStreamWriter) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) PrintWriter(java.io.PrintWriter)

Example 14 with NodeState

use of org.apache.hadoop.yarn.api.records.NodeState in project hadoop by apache.

the class NodeCLI method getAllValidNodeStates.

private String getAllValidNodeStates() {
    StringBuilder sb = new StringBuilder();
    sb.append("The valid node state can be one of the following: ");
    for (NodeState state : NodeState.values()) {
        sb.append(state).append(",");
    }
    String output = sb.toString();
    return output.substring(0, output.length() - 1) + ".";
}
Also used : NodeState(org.apache.hadoop.yarn.api.records.NodeState)

Example 15 with NodeState

use of org.apache.hadoop.yarn.api.records.NodeState in project hadoop by apache.

the class TestYarnCLI method testListClusterNodes.

@Test
public void testListClusterNodes() throws Exception {
    List<NodeReport> nodeReports = new ArrayList<NodeReport>();
    nodeReports.addAll(getNodeReports(1, NodeState.NEW));
    nodeReports.addAll(getNodeReports(2, NodeState.RUNNING));
    nodeReports.addAll(getNodeReports(1, NodeState.UNHEALTHY));
    nodeReports.addAll(getNodeReports(1, NodeState.DECOMMISSIONED));
    nodeReports.addAll(getNodeReports(1, NodeState.REBOOTED));
    nodeReports.addAll(getNodeReports(1, NodeState.LOST));
    NodeCLI cli = new NodeCLI();
    cli.setClient(client);
    cli.setSysOutPrintStream(sysOut);
    Set<NodeState> nodeStates = new HashSet<NodeState>();
    nodeStates.add(NodeState.NEW);
    NodeState[] states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    int result = cli.run(new String[] { "-list", "-states", "NEW" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintWriter pw = new PrintWriter(baos);
    pw.println("Total Nodes:1");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t            NEW\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    String nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(1)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.RUNNING);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "RUNNING" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:2");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host1:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(2)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    result = cli.run(new String[] { "-list" });
    assertEquals(0, result);
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(3)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    result = cli.run(new String[] { "-list", "-showDetails" });
    assertEquals(0, result);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:2");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.println("Detailed Node Information :");
    pw.println("\tConfigured Resources : <memory:0, vCores:0>");
    pw.println("\tAllocated Resources : <memory:0, vCores:0>");
    pw.println("\tResource Utilization by Node : PMem:2048 MB, VMem:4096 MB, VCores:8.0");
    pw.println("\tResource Utilization by Containers : PMem:1024 MB, VMem:2048 MB, VCores:4.0");
    pw.println("\tNode-Labels : ");
    pw.print("         host1:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.println("Detailed Node Information :");
    pw.println("\tConfigured Resources : <memory:0, vCores:0>");
    pw.println("\tAllocated Resources : <memory:0, vCores:0>");
    pw.println("\tResource Utilization by Node : PMem:2048 MB, VMem:4096 MB, VCores:8.0");
    pw.println("\tResource Utilization by Containers : PMem:1024 MB, VMem:2048 MB, VCores:4.0");
    pw.println("\tNode-Labels : ");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(4)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.UNHEALTHY);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "UNHEALTHY" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:1");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t      UNHEALTHY\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(5)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.DECOMMISSIONED);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "DECOMMISSIONED" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:1");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t DECOMMISSIONED\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(6)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.REBOOTED);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "REBOOTED" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:1");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t       REBOOTED\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(7)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.LOST);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "LOST" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:1");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t           LOST\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(8)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    nodeStates.add(NodeState.NEW);
    nodeStates.add(NodeState.RUNNING);
    nodeStates.add(NodeState.LOST);
    nodeStates.add(NodeState.REBOOTED);
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-states", "NEW,RUNNING,LOST,REBOOTED" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:5");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t            NEW\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host1:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t       REBOOTED\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t           LOST\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(9)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    nodeStates.clear();
    for (NodeState s : NodeState.values()) {
        nodeStates.add(s);
    }
    states = nodeStates.toArray(new NodeState[0]);
    when(client.getNodeReports(states)).thenReturn(getNodeReports(nodeReports, nodeStates));
    result = cli.run(new String[] { "-list", "-All" });
    assertEquals(0, result);
    verify(client).getNodeReports(states);
    baos = new ByteArrayOutputStream();
    pw = new PrintWriter(baos);
    pw.println("Total Nodes:7");
    pw.print("         Node-Id\t     Node-State\tNode-Http-Address\t");
    pw.println("Number-of-Running-Containers");
    pw.print("         host0:0\t            NEW\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host1:0\t        RUNNING\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t      UNHEALTHY\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t DECOMMISSIONED\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t       REBOOTED\t       host1:8888\t");
    pw.println("                           0");
    pw.print("         host0:0\t           LOST\t       host1:8888\t");
    pw.println("                           0");
    pw.close();
    nodesReportStr = baos.toString("UTF-8");
    Assert.assertEquals(nodesReportStr, sysOutStream.toString());
    verify(sysOut, times(10)).write(any(byte[].class), anyInt(), anyInt());
    sysOutStream.reset();
    result = cli.run(new String[] { "-list", "-states", "InvalidState" });
    assertEquals(-1, result);
}
Also used : NodeState(org.apache.hadoop.yarn.api.records.NodeState) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

NodeState (org.apache.hadoop.yarn.api.records.NodeState)15 NodeReport (org.apache.hadoop.yarn.api.records.NodeReport)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)4 PrintWriter (java.io.PrintWriter)3 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)3 OutputStreamWriter (java.io.OutputStreamWriter)2 HashMap (java.util.HashMap)2 GetClusterNodesResponse (org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse)2 Test (org.junit.Test)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 GET (javax.ws.rs.GET)1 Path (javax.ws.rs.Path)1 Produces (javax.ws.rs.Produces)1 CommandLine (org.apache.commons.cli.CommandLine)1 GnuParser (org.apache.commons.cli.GnuParser)1