Search in sources :

Example 1 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class ResourceTrackerService method registerNodeManager.

@SuppressWarnings("unchecked")
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
    NodeId nodeId = request.getNodeId();
    String host = nodeId.getHost();
    int cmPort = nodeId.getPort();
    int httpPort = request.getHttpPort();
    Resource capability = request.getResource();
    String nodeManagerVersion = request.getNMVersion();
    Resource physicalResource = request.getPhysicalResource();
    RegisterNodeManagerResponse response = recordFactory.newRecordInstance(RegisterNodeManagerResponse.class);
    if (!minimumNodeManagerVersion.equals("NONE")) {
        if (minimumNodeManagerVersion.equals("EqualToRM")) {
            minimumNodeManagerVersion = YarnVersionInfo.getVersion();
        }
        if ((nodeManagerVersion == null) || (VersionUtil.compareVersions(nodeManagerVersion, minimumNodeManagerVersion)) < 0) {
            String message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version " + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager.";
            LOG.info(message);
            response.setDiagnosticsMessage(message);
            response.setNodeAction(NodeAction.SHUTDOWN);
            return response;
        }
    }
    // Check if this node is a 'valid' node
    if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) {
        String message = "Disallowed NodeManager from  " + host + ", Sending SHUTDOWN signal to the NodeManager.";
        LOG.info(message);
        response.setDiagnosticsMessage(message);
        response.setNodeAction(NodeAction.SHUTDOWN);
        return response;
    }
    // check if node's capacity is load from dynamic-resources.xml
    String nid = nodeId.toString();
    Resource dynamicLoadCapability = loadNodeResourceFromDRConfiguration(nid);
    if (dynamicLoadCapability != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Resource for node: " + nid + " is adjusted from: " + capability + " to: " + dynamicLoadCapability + " due to settings in dynamic-resources.xml.");
        }
        capability = dynamicLoadCapability;
        // sync back with new resource.
        response.setResource(capability);
    }
    // Check if this node has minimum allocations
    if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) {
        String message = "NodeManager from  " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + " signal to the NodeManager.";
        LOG.info(message);
        response.setDiagnosticsMessage(message);
        response.setNodeAction(NodeAction.SHUTDOWN);
        return response;
    }
    response.setContainerTokenMasterKey(containerTokenSecretManager.getCurrentKey());
    response.setNMTokenMasterKey(nmTokenSecretManager.getCurrentKey());
    RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, resolve(host), capability, nodeManagerVersion, physicalResource);
    RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
    if (oldNode == null) {
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeStartedEvent(nodeId, request.getNMContainerStatuses(), request.getRunningApplications()));
    } else {
        LOG.info("Reconnect from the node at: " + host);
        this.nmLivelinessMonitor.unregister(nodeId);
        // Reset heartbeat ID since node just restarted.
        oldNode.resetLastNodeHeartBeatResponse();
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeReconnectEvent(nodeId, rmNode, request.getRunningApplications(), request.getNMContainerStatuses()));
    }
    // On every node manager register we will be clearing NMToken keys if
    // present for any running application.
    this.nmTokenSecretManager.removeNodeKey(nodeId);
    this.nmLivelinessMonitor.register(nodeId);
    // RMNode inserted
    if (!rmContext.isWorkPreservingRecoveryEnabled()) {
        if (!request.getNMContainerStatuses().isEmpty()) {
            LOG.info("received container statuses on node manager register :" + request.getNMContainerStatuses());
            for (NMContainerStatus status : request.getNMContainerStatuses()) {
                handleNMContainerStatus(status, nodeId);
            }
        }
    }
    // Update node's labels to RM's NodeLabelManager.
    Set<String> nodeLabels = NodeLabelsUtils.convertToStringSet(request.getNodeLabels());
    if (isDistributedNodeLabelsConf && nodeLabels != null) {
        try {
            updateNodeLabelsFromNMReport(nodeLabels, nodeId);
            response.setAreNodeLabelsAcceptedByRM(true);
        } catch (IOException ex) {
            // Ensure the exception is captured in the response
            response.setDiagnosticsMessage(ex.getMessage());
            response.setAreNodeLabelsAcceptedByRM(false);
        }
    } else if (isDelegatedCentralizedNodeLabelsConf) {
        this.rmContext.getRMDelegatedNodeLabelsUpdater().updateNodeLabels(nodeId);
    }
    StringBuilder message = new StringBuilder();
    message.append("NodeManager from node ").append(host).append("(cmPort: ").append(cmPort).append(" httpPort: ");
    message.append(httpPort).append(") ").append("registered with capability: ").append(capability);
    message.append(", assigned nodeId ").append(nodeId);
    if (response.getAreNodeLabelsAcceptedByRM()) {
        message.append(", node labels { ").append(StringUtils.join(",", nodeLabels) + " } ");
    }
    LOG.info(message.toString());
    response.setNodeAction(NodeAction.NORMAL);
    response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
    response.setRMVersion(YarnVersionInfo.getVersion());
    return response;
}
Also used : RMNodeReconnectEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeReconnectEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) IOException(java.io.IOException) RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse) UnRegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)

Example 2 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method testForHandlingDuplicatedCompltedContainers.

@Test
public void testForHandlingDuplicatedCompltedContainers() {
    // Start the node
    node.handle(new RMNodeStartedEvent(null, null, null));
    // Add info to the queue first
    node.setNextHeartBeat(false);
    ContainerId completedContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
    RMNodeStatusEvent statusEvent1 = getMockRMNodeStatusEvent(null);
    ContainerStatus containerStatus1 = mock(ContainerStatus.class);
    doReturn(completedContainerId1).when(containerStatus1).getContainerId();
    doReturn(Collections.singletonList(containerStatus1)).when(statusEvent1).getContainers();
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    node.handle(statusEvent1);
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    Assert.assertEquals(1, node.getQueueSize());
    Assert.assertEquals(1, node.getCompletedContainers().size());
    // test for duplicate entries
    node.handle(statusEvent1);
    Assert.assertEquals(1, node.getQueueSize());
    // send clean up container event
    node.handle(new RMNodeFinishedContainersPulledByAMEvent(node.getNodeID(), Collections.singletonList(completedContainerId1)));
    NodeHeartbeatResponse hbrsp = Records.newRecord(NodeHeartbeatResponse.class);
    node.updateNodeHeartbeatResponseForCleanup(hbrsp);
    Assert.assertEquals(1, hbrsp.getContainersToBeRemovedFromNM().size());
    Assert.assertEquals(0, node.getCompletedContainers().size());
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMNodeFinishedContainersPulledByAMEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent) Test(org.junit.Test)

Example 3 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method testStatusChange.

@Test(timeout = 5000)
public void testStatusChange() {
    //Start the node
    node.handle(new RMNodeStartedEvent(null, null, null));
    //Add info to the queue first
    node.setNextHeartBeat(false);
    ContainerId completedContainerId1 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(0, 0), 0), 0);
    ContainerId completedContainerId2 = BuilderUtils.newContainerId(BuilderUtils.newApplicationAttemptId(BuilderUtils.newApplicationId(1, 1), 1), 1);
    RMNodeStatusEvent statusEvent1 = getMockRMNodeStatusEvent(null);
    RMNodeStatusEvent statusEvent2 = getMockRMNodeStatusEvent(null);
    ContainerStatus containerStatus1 = mock(ContainerStatus.class);
    ContainerStatus containerStatus2 = mock(ContainerStatus.class);
    doReturn(completedContainerId1).when(containerStatus1).getContainerId();
    doReturn(Collections.singletonList(containerStatus1)).when(statusEvent1).getContainers();
    doReturn(completedContainerId2).when(containerStatus2).getContainerId();
    doReturn(Collections.singletonList(containerStatus2)).when(statusEvent2).getContainers();
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    node.handle(statusEvent1);
    node.handle(statusEvent2);
    verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class));
    Assert.assertEquals(2, node.getQueueSize());
    node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
    Assert.assertEquals(0, node.getQueueSize());
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMNodeStatusEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent) Test(org.junit.Test)

Example 4 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method getRunningNode.

private RMNodeImpl getRunningNode(String nmVersion, int port) {
    NodeId nodeId = BuilderUtils.newNodeId("localhost", port);
    Resource capability = Resource.newInstance(4096, 4);
    RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability, nmVersion);
    node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null));
    Assert.assertEquals(NodeState.RUNNING, node.getState());
    return node;
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)

Example 5 with RMNodeStartedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent in project hadoop by apache.

the class TestRMNodeTransitions method testAdd.

@Test
public void testAdd() {
    RMNodeImpl node = getNewNode();
    ClusterMetrics cm = ClusterMetrics.getMetrics();
    int initialActive = cm.getNumActiveNMs();
    int initialLost = cm.getNumLostNMs();
    int initialUnhealthy = cm.getUnhealthyNMs();
    int initialDecommissioned = cm.getNumDecommisionedNMs();
    int initialRebooted = cm.getNumRebootedNMs();
    node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null));
    Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
    Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
    Assert.assertEquals("Unhealthy Nodes", initialUnhealthy, cm.getUnhealthyNMs());
    Assert.assertEquals("Decommissioned Nodes", initialDecommissioned, cm.getNumDecommisionedNMs());
    Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs());
    Assert.assertEquals(NodeState.RUNNING, node.getState());
    Assert.assertNotNull(nodesListManagerEvent);
    Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType());
}
Also used : RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Aggregations

RMNodeStartedEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent)10 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)7 Test (org.junit.Test)6 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)5 NodeId (org.apache.hadoop.yarn.api.records.NodeId)5 RMNodeStatusEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent)5 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)4 Resource (org.apache.hadoop.yarn.api.records.Resource)4 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)3 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)2 RMNodeEvent (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1 LogAggregationReport (org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport)1 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)1 NodeHeartbeatResponse (org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse)1 RegisterNodeManagerResponse (org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse)1 UnRegisterNodeManagerResponse (org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse)1