Search in sources :

Example 16 with NodeStatus

use of org.apache.hadoop.yarn.server.api.records.NodeStatus in project hadoop by apache.

the class TestResourceTrackerOnHA method testResourceTrackerOnHA.

@Test(timeout = 15000)
public void testResourceTrackerOnHA() throws Exception {
    NodeId nodeId = NodeId.newInstance("localhost", 0);
    Resource resource = Resource.newInstance(2048, 4);
    // make sure registerNodeManager works when failover happens
    RegisterNodeManagerRequest request = RegisterNodeManagerRequest.newInstance(nodeId, 0, resource, YarnVersionInfo.getVersion(), null, null);
    resourceTracker.registerNodeManager(request);
    Assert.assertTrue(waitForNodeManagerToConnect(10000, nodeId));
    // restart the failover thread, and make sure nodeHeartbeat works
    failoverThread = createAndStartFailoverThread();
    NodeStatus status = NodeStatus.newInstance(NodeId.newInstance("localhost", 0), 0, null, null, null, null, null, null);
    NodeHeartbeatRequest request2 = NodeHeartbeatRequest.newInstance(status, null, null, null);
    resourceTracker.nodeHeartbeat(request2);
}
Also used : RegisterNodeManagerRequest(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) NodeHeartbeatRequest(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus) Test(org.junit.Test)

Example 17 with NodeStatus

use of org.apache.hadoop.yarn.server.api.records.NodeStatus in project hadoop by apache.

the class TestProtocolRecords method testNodeHeartBeatRequest.

@Test
public void testNodeHeartBeatRequest() throws IOException {
    NodeHeartbeatRequest record = Records.newRecord(NodeHeartbeatRequest.class);
    NodeStatus nodeStatus = Records.newRecord(NodeStatus.class);
    OpportunisticContainersStatus opportunisticContainersStatus = Records.newRecord(OpportunisticContainersStatus.class);
    opportunisticContainersStatus.setEstimatedQueueWaitTime(123);
    opportunisticContainersStatus.setWaitQueueLength(321);
    nodeStatus.setOpportunisticContainersStatus(opportunisticContainersStatus);
    record.setNodeStatus(nodeStatus);
    NodeHeartbeatRequestPBImpl pb = new NodeHeartbeatRequestPBImpl(((NodeHeartbeatRequestPBImpl) record).getProto());
    Assert.assertEquals(123, pb.getNodeStatus().getOpportunisticContainersStatus().getEstimatedQueueWaitTime());
    Assert.assertEquals(321, pb.getNodeStatus().getOpportunisticContainersStatus().getWaitQueueLength());
}
Also used : OpportunisticContainersStatus(org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus) NodeHeartbeatRequestPBImpl(org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.NodeHeartbeatRequestPBImpl) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus) Test(org.junit.Test)

Example 18 with NodeStatus

use of org.apache.hadoop.yarn.server.api.records.NodeStatus in project hadoop by apache.

the class NodeStatusUpdaterImpl method getNodeStatus.

@VisibleForTesting
protected NodeStatus getNodeStatus(int responseId) throws IOException {
    NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
    nodeHealthStatus.setHealthReport(healthChecker.getHealthReport());
    nodeHealthStatus.setIsNodeHealthy(healthChecker.isHealthy());
    nodeHealthStatus.setLastHealthReportTime(healthChecker.getLastHealthReportTime());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy() + ", " + nodeHealthStatus.getHealthReport());
    }
    List<ContainerStatus> containersStatuses = getContainerStatuses();
    ResourceUtilization containersUtilization = getContainersUtilization();
    ResourceUtilization nodeUtilization = getNodeUtilization();
    List<org.apache.hadoop.yarn.api.records.Container> increasedContainers = getIncreasedContainers();
    NodeStatus nodeStatus = NodeStatus.newInstance(nodeId, responseId, containersStatuses, createKeepAliveApplicationList(), nodeHealthStatus, containersUtilization, nodeUtilization, increasedContainers);
    nodeStatus.setOpportunisticContainersStatus(getOpportunisticContainersStatus());
    return nodeStatus;
}
Also used : NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ResourceUtilization(org.apache.hadoop.yarn.api.records.ResourceUtilization) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus) NodeHealthStatus(org.apache.hadoop.yarn.server.api.records.NodeHealthStatus) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 19 with NodeStatus

use of org.apache.hadoop.yarn.server.api.records.NodeStatus in project hadoop by apache.

the class ResourceTrackerService method nodeHeartbeat.

@SuppressWarnings("unchecked")
@Override
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnException, IOException {
    NodeStatus remoteNodeStatus = request.getNodeStatus();
    /**
     * Here is the node heartbeat sequence...
     * 1. Check if it's a valid (i.e. not excluded) node
     * 2. Check if it's a registered node
     * 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
     * 4. Send healthStatus to RMNode
     * 5. Update node's labels if distributed Node Labels configuration is enabled
     */
    NodeId nodeId = remoteNodeStatus.getNodeId();
    // in decommissioning.
    if (!this.nodesListManager.isValidNode(nodeId.getHost()) && !isNodeInDecommissioning(nodeId)) {
        String message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId.getHost();
        LOG.info(message);
        return YarnServerBuilderUtils.newNodeHeartbeatResponse(NodeAction.SHUTDOWN, message);
    }
    // 2. Check if it's a registered node
    RMNode rmNode = this.rmContext.getRMNodes().get(nodeId);
    if (rmNode == null) {
        /* node does not exist */
        String message = "Node not found resyncing " + remoteNodeStatus.getNodeId();
        LOG.info(message);
        return YarnServerBuilderUtils.newNodeHeartbeatResponse(NodeAction.RESYNC, message);
    }
    // Send ping
    this.nmLivelinessMonitor.receivedPing(nodeId);
    this.decommissioningWatcher.update(rmNode, remoteNodeStatus);
    // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
    NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.getLastNodeHeartBeatResponse();
    if (remoteNodeStatus.getResponseId() + 1 == lastNodeHeartbeatResponse.getResponseId()) {
        LOG.info("Received duplicate heartbeat from node " + rmNode.getNodeAddress() + " responseId=" + remoteNodeStatus.getResponseId());
        return lastNodeHeartbeatResponse;
    } else if (remoteNodeStatus.getResponseId() + 1 < lastNodeHeartbeatResponse.getResponseId()) {
        String message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.getResponseId() + " nm response id:" + remoteNodeStatus.getResponseId();
        LOG.info(message);
        // TODO: Just sending reboot is not enough. Think more.
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING));
        return YarnServerBuilderUtils.newNodeHeartbeatResponse(NodeAction.RESYNC, message);
    }
    boolean timelineV2Enabled = YarnConfiguration.timelineServiceV2Enabled(getConfig());
    if (timelineV2Enabled) {
        // Check & update collectors info from request.
        // TODO make sure it won't have race condition issue for AM failed over
        // case that the older registration could possible override the newer
        // one.
        updateAppCollectorsMap(request);
    }
    // Evaluate whether a DECOMMISSIONING node is ready to be DECOMMISSIONED.
    if (rmNode.getState() == NodeState.DECOMMISSIONING && decommissioningWatcher.checkReadyToBeDecommissioned(rmNode.getNodeID())) {
        String message = "DECOMMISSIONING " + nodeId + " is ready to be decommissioned";
        LOG.info(message);
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
        this.nmLivelinessMonitor.unregister(nodeId);
        return YarnServerBuilderUtils.newNodeHeartbeatResponse(NodeAction.SHUTDOWN, message);
    }
    // Heartbeat response
    NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.newNodeHeartbeatResponse(lastNodeHeartbeatResponse.getResponseId() + 1, NodeAction.NORMAL, null, null, null, null, nextHeartBeatInterval);
    rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
    rmNode.updateNodeHeartbeatResponseForContainersDecreasing(nodeHeartBeatResponse);
    populateKeys(request, nodeHeartBeatResponse);
    ConcurrentMap<ApplicationId, ByteBuffer> systemCredentials = rmContext.getSystemCredentialsForApps();
    if (!systemCredentials.isEmpty()) {
        nodeHeartBeatResponse.setSystemCredentialsForApps(systemCredentials);
    }
    if (timelineV2Enabled) {
        // Return collectors' map that NM needs to know
        setAppCollectorsMapToResponse(rmNode.getRunningApps(), nodeHeartBeatResponse);
    }
    // 4. Send status to RMNode, saving the latest response.
    RMNodeStatusEvent nodeStatusEvent = new RMNodeStatusEvent(nodeId, remoteNodeStatus, nodeHeartBeatResponse);
    if (request.getLogAggregationReportsForApps() != null && !request.getLogAggregationReportsForApps().isEmpty()) {
        nodeStatusEvent.setLogAggregationReportsForApps(request.getLogAggregationReportsForApps());
    }
    this.rmContext.getDispatcher().getEventHandler().handle(nodeStatusEvent);
    // 5. Update node's labels to RM's NodeLabelManager.
    if (isDistributedNodeLabelsConf && request.getNodeLabels() != null) {
        try {
            updateNodeLabelsFromNMReport(NodeLabelsUtils.convertToStringSet(request.getNodeLabels()), nodeId);
            nodeHeartBeatResponse.setAreNodeLabelsAcceptedByRM(true);
        } catch (IOException ex) {
            //ensure the error message is captured and sent across in response
            nodeHeartBeatResponse.setDiagnosticsMessage(ex.getMessage());
            nodeHeartBeatResponse.setAreNodeLabelsAcceptedByRM(false);
        }
    }
    // 6. check if node's capacity is load from dynamic-resources.xml
    // if so, send updated resource back to NM.
    String nid = nodeId.toString();
    Resource capability = loadNodeResourceFromDRConfiguration(nid);
    // sync back with new resource if not null.
    if (capability != null) {
        nodeHeartBeatResponse.setResource(capability);
    }
    // the node to truncate the number of Containers queued for execution.
    if (this.rmContext.getNodeManagerQueueLimitCalculator() != null) {
        nodeHeartBeatResponse.setContainerQueuingLimit(this.rmContext.getNodeManagerQueueLimitCalculator().createContainerQueuingLimit());
    }
    return nodeHeartBeatResponse;
}
Also used : NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent) NodeStatus(org.apache.hadoop.yarn.server.api.records.NodeStatus)

Aggregations

NodeStatus (org.apache.hadoop.yarn.server.api.records.NodeStatus)19 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)10 NodeHealthStatus (org.apache.hadoop.yarn.server.api.records.NodeHealthStatus)9 Test (org.junit.Test)9 RMNodeStatusEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent)8 NodeId (org.apache.hadoop.yarn.api.records.NodeId)7 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)7 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)6 NodeHeartbeatResponse (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse)6 ArrayList (java.util.ArrayList)5 Resource (org.apache.hadoop.yarn.api.records.Resource)5 NodeHeartbeatRequest (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest)5 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)4 ResourceUtilization (org.apache.hadoop.yarn.api.records.ResourceUtilization)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)3 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)3 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)3 ClientResponse (com.sun.jersey.api.client.ClientResponse)2 WebResource (com.sun.jersey.api.client.WebResource)2