Search in sources :

Example 66 with NodeId

use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.

the class AdminService method refreshNodesResources.

@Override
public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request) throws YarnException, StandbyException {
    final String operation = "refreshNodesResources";
    UserGroupInformation user = checkAcls(operation);
    final String msg = "refresh nodes.";
    checkRMStatus(user.getShortUserName(), operation, msg);
    RefreshNodesResourcesResponse response = recordFactory.newRecordInstance(RefreshNodesResourcesResponse.class);
    try {
        Configuration conf = getConfig();
        Configuration configuration = new Configuration(conf);
        DynamicResourceConfiguration newConf;
        InputStream drInputStream = this.rmContext.getConfigurationProvider().getConfigurationInputStream(configuration, YarnConfiguration.DR_CONFIGURATION_FILE);
        if (drInputStream != null) {
            newConf = new DynamicResourceConfiguration(configuration, drInputStream);
        } else {
            newConf = new DynamicResourceConfiguration(configuration);
        }
        if (newConf.getNodes() != null && newConf.getNodes().length != 0) {
            Map<NodeId, ResourceOption> nodeResourceMap = newConf.getNodeResourceMap();
            UpdateNodeResourceRequest updateRequest = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
            updateNodeResource(updateRequest);
        }
        // refresh dynamic resource in ResourceTrackerService
        this.rmContext.getResourceTrackerService().updateDynamicResourceConfiguration(newConf);
        RMAuditLogger.logSuccess(user.getShortUserName(), operation, "AdminService");
        return response;
    } catch (IOException ioe) {
        throw logAndWrapException(ioe, user.getShortUserName(), operation, msg);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) InputStream(java.io.InputStream) NodeId(org.apache.hadoop.yarn.api.records.NodeId) IOException(java.io.IOException) RefreshNodesResourcesResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest)

Example 67 with NodeId

use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.

the class NodesListManager method setDecomissionedNMs.

private void setDecomissionedNMs() {
    Set<String> excludeList = hostsReader.getExcludedHosts();
    for (final String host : excludeList) {
        NodeId nodeId = createUnknownNodeId(host);
        RMNodeImpl rmNode = new RMNodeImpl(nodeId, rmContext, host, -1, -1, new UnknownNode(host), Resource.newInstance(0, 0), "unknown");
        rmContext.getInactiveRMNodes().put(nodeId, rmNode);
        rmNode.handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
    }
}
Also used : NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) RMNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent)

Example 68 with NodeId

use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.

the class NodesListManager method serviceInit.

@Override
protected void serviceInit(Configuration conf) throws Exception {
    this.conf = conf;
    int nodeIpCacheTimeout = conf.getInt(YarnConfiguration.RM_NODE_IP_CACHE_EXPIRY_INTERVAL_SECS, YarnConfiguration.DEFAULT_RM_NODE_IP_CACHE_EXPIRY_INTERVAL_SECS);
    if (nodeIpCacheTimeout <= 0) {
        resolver = new DirectResolver();
    } else {
        resolver = new CachedResolver(SystemClock.getInstance(), nodeIpCacheTimeout);
        addIfService(resolver);
    }
    // Read the hosts/exclude files to restrict access to the RM
    try {
        this.includesFile = conf.get(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, YarnConfiguration.DEFAULT_RM_NODES_INCLUDE_FILE_PATH);
        this.excludesFile = conf.get(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH);
        this.hostsReader = createHostsFileReader(this.includesFile, this.excludesFile);
        setDecomissionedNMs();
        printConfiguredHosts();
    } catch (YarnException ex) {
        disableHostsFileReader(ex);
    } catch (IOException ioe) {
        disableHostsFileReader(ioe);
    }
    final int nodeRemovalTimeout = conf.getInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, YarnConfiguration.DEFAULT_RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC);
    nodeRemovalCheckInterval = (Math.min(nodeRemovalTimeout / 2, 600000));
    removalTimer = new Timer("Node Removal Timer");
    removalTimer.schedule(new TimerTask() {

        @Override
        public void run() {
            long now = Time.monotonicNow();
            for (Map.Entry<NodeId, RMNode> entry : rmContext.getInactiveRMNodes().entrySet()) {
                NodeId nodeId = entry.getKey();
                RMNode rmNode = entry.getValue();
                if (isUntrackedNode(rmNode.getHostName())) {
                    if (rmNode.getUntrackedTimeStamp() == 0) {
                        rmNode.setUntrackedTimeStamp(now);
                    } else if (now - rmNode.getUntrackedTimeStamp() > nodeRemovalTimeout) {
                        RMNode result = rmContext.getInactiveRMNodes().remove(nodeId);
                        if (result != null) {
                            decrInactiveNMMetrics(rmNode);
                            LOG.info("Removed " + result.getState().toString() + " node " + result.getHostName() + " from inactive nodes list");
                        }
                    }
                } else {
                    rmNode.setUntrackedTimeStamp(0);
                }
            }
        }
    }, nodeRemovalCheckInterval, nodeRemovalCheckInterval);
    super.serviceInit(conf);
}
Also used : Entry(java.util.Map.Entry) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) Timer(java.util.Timer) TimerTask(java.util.TimerTask) NodeId(org.apache.hadoop.yarn.api.records.NodeId) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Example 69 with NodeId

use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.

the class ResourceTrackerService method registerNodeManager.

@SuppressWarnings("unchecked")
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
    NodeId nodeId = request.getNodeId();
    String host = nodeId.getHost();
    int cmPort = nodeId.getPort();
    int httpPort = request.getHttpPort();
    Resource capability = request.getResource();
    String nodeManagerVersion = request.getNMVersion();
    Resource physicalResource = request.getPhysicalResource();
    RegisterNodeManagerResponse response = recordFactory.newRecordInstance(RegisterNodeManagerResponse.class);
    if (!minimumNodeManagerVersion.equals("NONE")) {
        if (minimumNodeManagerVersion.equals("EqualToRM")) {
            minimumNodeManagerVersion = YarnVersionInfo.getVersion();
        }
        if ((nodeManagerVersion == null) || (VersionUtil.compareVersions(nodeManagerVersion, minimumNodeManagerVersion)) < 0) {
            String message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version " + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager.";
            LOG.info(message);
            response.setDiagnosticsMessage(message);
            response.setNodeAction(NodeAction.SHUTDOWN);
            return response;
        }
    }
    // Check if this node is a 'valid' node
    if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) {
        String message = "Disallowed NodeManager from  " + host + ", Sending SHUTDOWN signal to the NodeManager.";
        LOG.info(message);
        response.setDiagnosticsMessage(message);
        response.setNodeAction(NodeAction.SHUTDOWN);
        return response;
    }
    // check if node's capacity is load from dynamic-resources.xml
    String nid = nodeId.toString();
    Resource dynamicLoadCapability = loadNodeResourceFromDRConfiguration(nid);
    if (dynamicLoadCapability != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Resource for node: " + nid + " is adjusted from: " + capability + " to: " + dynamicLoadCapability + " due to settings in dynamic-resources.xml.");
        }
        capability = dynamicLoadCapability;
        // sync back with new resource.
        response.setResource(capability);
    }
    // Check if this node has minimum allocations
    if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) {
        String message = "NodeManager from  " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + " signal to the NodeManager.";
        LOG.info(message);
        response.setDiagnosticsMessage(message);
        response.setNodeAction(NodeAction.SHUTDOWN);
        return response;
    }
    response.setContainerTokenMasterKey(containerTokenSecretManager.getCurrentKey());
    response.setNMTokenMasterKey(nmTokenSecretManager.getCurrentKey());
    RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, resolve(host), capability, nodeManagerVersion, physicalResource);
    RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
    if (oldNode == null) {
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeStartedEvent(nodeId, request.getNMContainerStatuses(), request.getRunningApplications()));
    } else {
        LOG.info("Reconnect from the node at: " + host);
        this.nmLivelinessMonitor.unregister(nodeId);
        // Reset heartbeat ID since node just restarted.
        oldNode.resetLastNodeHeartBeatResponse();
        this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeReconnectEvent(nodeId, rmNode, request.getRunningApplications(), request.getNMContainerStatuses()));
    }
    // On every node manager register we will be clearing NMToken keys if
    // present for any running application.
    this.nmTokenSecretManager.removeNodeKey(nodeId);
    this.nmLivelinessMonitor.register(nodeId);
    // RMNode inserted
    if (!rmContext.isWorkPreservingRecoveryEnabled()) {
        if (!request.getNMContainerStatuses().isEmpty()) {
            LOG.info("received container statuses on node manager register :" + request.getNMContainerStatuses());
            for (NMContainerStatus status : request.getNMContainerStatuses()) {
                handleNMContainerStatus(status, nodeId);
            }
        }
    }
    // Update node's labels to RM's NodeLabelManager.
    Set<String> nodeLabels = NodeLabelsUtils.convertToStringSet(request.getNodeLabels());
    if (isDistributedNodeLabelsConf && nodeLabels != null) {
        try {
            updateNodeLabelsFromNMReport(nodeLabels, nodeId);
            response.setAreNodeLabelsAcceptedByRM(true);
        } catch (IOException ex) {
            // Ensure the exception is captured in the response
            response.setDiagnosticsMessage(ex.getMessage());
            response.setAreNodeLabelsAcceptedByRM(false);
        }
    } else if (isDelegatedCentralizedNodeLabelsConf) {
        this.rmContext.getRMDelegatedNodeLabelsUpdater().updateNodeLabels(nodeId);
    }
    StringBuilder message = new StringBuilder();
    message.append("NodeManager from node ").append(host).append("(cmPort: ").append(cmPort).append(" httpPort: ");
    message.append(httpPort).append(") ").append("registered with capability: ").append(capability);
    message.append(", assigned nodeId ").append(nodeId);
    if (response.getAreNodeLabelsAcceptedByRM()) {
        message.append(", node labels { ").append(StringUtils.join(",", nodeLabels) + " } ");
    }
    LOG.info(message.toString());
    response.setNodeAction(NodeAction.NORMAL);
    response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
    response.setRMVersion(YarnVersionInfo.getVersion());
    return response;
}
Also used : RMNodeReconnectEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeReconnectEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) IOException(java.io.IOException) RMNodeStartedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStartedEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse) UnRegisterNodeManagerResponse(org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerResponse) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)

Example 70 with NodeId

use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.

the class AdminService method checkForDecommissioningNodes.

@Override
public CheckForDecommissioningNodesResponse checkForDecommissioningNodes(CheckForDecommissioningNodesRequest checkForDecommissioningNodesRequest) throws IOException, YarnException {
    final String operation = "checkForDecommissioningNodes";
    final String msg = "check for decommissioning nodes.";
    UserGroupInformation user = checkAcls("checkForDecommissioningNodes");
    checkRMStatus(user.getShortUserName(), operation, msg);
    Set<NodeId> decommissioningNodes = rmContext.getNodesListManager().checkForDecommissioningNodes();
    RMAuditLogger.logSuccess(user.getShortUserName(), operation, "AdminService");
    CheckForDecommissioningNodesResponse response = recordFactory.newRecordInstance(CheckForDecommissioningNodesResponse.class);
    response.setDecommissioningNodes(decommissioningNodes);
    return response;
}
Also used : NodeId(org.apache.hadoop.yarn.api.records.NodeId) CheckForDecommissioningNodesResponse(org.apache.hadoop.yarn.server.api.protocolrecords.CheckForDecommissioningNodesResponse) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

NodeId (org.apache.hadoop.yarn.api.records.NodeId)257 Test (org.junit.Test)137 Resource (org.apache.hadoop.yarn.api.records.Resource)89 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)74 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)59 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)46 Container (org.apache.hadoop.yarn.api.records.Container)44 ArrayList (java.util.ArrayList)43 Configuration (org.apache.hadoop.conf.Configuration)40 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)40 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)40 HashSet (java.util.HashSet)39 Set (java.util.Set)36 HashMap (java.util.HashMap)35 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)34 Priority (org.apache.hadoop.yarn.api.records.Priority)32 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)31 IOException (java.io.IOException)29 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)29 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)28