use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class AdminService method refreshNodesResources.
@Override
public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request) throws YarnException, StandbyException {
final String operation = "refreshNodesResources";
UserGroupInformation user = checkAcls(operation);
final String msg = "refresh nodes.";
checkRMStatus(user.getShortUserName(), operation, msg);
RefreshNodesResourcesResponse response = recordFactory.newRecordInstance(RefreshNodesResourcesResponse.class);
try {
Configuration conf = getConfig();
Configuration configuration = new Configuration(conf);
DynamicResourceConfiguration newConf;
InputStream drInputStream = this.rmContext.getConfigurationProvider().getConfigurationInputStream(configuration, YarnConfiguration.DR_CONFIGURATION_FILE);
if (drInputStream != null) {
newConf = new DynamicResourceConfiguration(configuration, drInputStream);
} else {
newConf = new DynamicResourceConfiguration(configuration);
}
if (newConf.getNodes() != null && newConf.getNodes().length != 0) {
Map<NodeId, ResourceOption> nodeResourceMap = newConf.getNodeResourceMap();
UpdateNodeResourceRequest updateRequest = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
updateNodeResource(updateRequest);
}
// refresh dynamic resource in ResourceTrackerService
this.rmContext.getResourceTrackerService().updateDynamicResourceConfiguration(newConf);
RMAuditLogger.logSuccess(user.getShortUserName(), operation, "AdminService");
return response;
} catch (IOException ioe) {
throw logAndWrapException(ioe, user.getShortUserName(), operation, msg);
}
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class NodesListManager method setDecomissionedNMs.
private void setDecomissionedNMs() {
Set<String> excludeList = hostsReader.getExcludedHosts();
for (final String host : excludeList) {
NodeId nodeId = createUnknownNodeId(host);
RMNodeImpl rmNode = new RMNodeImpl(nodeId, rmContext, host, -1, -1, new UnknownNode(host), Resource.newInstance(0, 0), "unknown");
rmContext.getInactiveRMNodes().put(nodeId, rmNode);
rmNode.handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
}
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class NodesListManager method serviceInit.
@Override
protected void serviceInit(Configuration conf) throws Exception {
this.conf = conf;
int nodeIpCacheTimeout = conf.getInt(YarnConfiguration.RM_NODE_IP_CACHE_EXPIRY_INTERVAL_SECS, YarnConfiguration.DEFAULT_RM_NODE_IP_CACHE_EXPIRY_INTERVAL_SECS);
if (nodeIpCacheTimeout <= 0) {
resolver = new DirectResolver();
} else {
resolver = new CachedResolver(SystemClock.getInstance(), nodeIpCacheTimeout);
addIfService(resolver);
}
// Read the hosts/exclude files to restrict access to the RM
try {
this.includesFile = conf.get(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, YarnConfiguration.DEFAULT_RM_NODES_INCLUDE_FILE_PATH);
this.excludesFile = conf.get(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH);
this.hostsReader = createHostsFileReader(this.includesFile, this.excludesFile);
setDecomissionedNMs();
printConfiguredHosts();
} catch (YarnException ex) {
disableHostsFileReader(ex);
} catch (IOException ioe) {
disableHostsFileReader(ioe);
}
final int nodeRemovalTimeout = conf.getInt(YarnConfiguration.RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC, YarnConfiguration.DEFAULT_RM_NODEMANAGER_UNTRACKED_REMOVAL_TIMEOUT_MSEC);
nodeRemovalCheckInterval = (Math.min(nodeRemovalTimeout / 2, 600000));
removalTimer = new Timer("Node Removal Timer");
removalTimer.schedule(new TimerTask() {
@Override
public void run() {
long now = Time.monotonicNow();
for (Map.Entry<NodeId, RMNode> entry : rmContext.getInactiveRMNodes().entrySet()) {
NodeId nodeId = entry.getKey();
RMNode rmNode = entry.getValue();
if (isUntrackedNode(rmNode.getHostName())) {
if (rmNode.getUntrackedTimeStamp() == 0) {
rmNode.setUntrackedTimeStamp(now);
} else if (now - rmNode.getUntrackedTimeStamp() > nodeRemovalTimeout) {
RMNode result = rmContext.getInactiveRMNodes().remove(nodeId);
if (result != null) {
decrInactiveNMMetrics(rmNode);
LOG.info("Removed " + result.getState().toString() + " node " + result.getHostName() + " from inactive nodes list");
}
}
} else {
rmNode.setUntrackedTimeStamp(0);
}
}
}
}, nodeRemovalCheckInterval, nodeRemovalCheckInterval);
super.serviceInit(conf);
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class ResourceTrackerService method registerNodeManager.
@SuppressWarnings("unchecked")
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
NodeId nodeId = request.getNodeId();
String host = nodeId.getHost();
int cmPort = nodeId.getPort();
int httpPort = request.getHttpPort();
Resource capability = request.getResource();
String nodeManagerVersion = request.getNMVersion();
Resource physicalResource = request.getPhysicalResource();
RegisterNodeManagerResponse response = recordFactory.newRecordInstance(RegisterNodeManagerResponse.class);
if (!minimumNodeManagerVersion.equals("NONE")) {
if (minimumNodeManagerVersion.equals("EqualToRM")) {
minimumNodeManagerVersion = YarnVersionInfo.getVersion();
}
if ((nodeManagerVersion == null) || (VersionUtil.compareVersions(nodeManagerVersion, minimumNodeManagerVersion)) < 0) {
String message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version " + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
}
// Check if this node is a 'valid' node
if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) {
String message = "Disallowed NodeManager from " + host + ", Sending SHUTDOWN signal to the NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
// check if node's capacity is load from dynamic-resources.xml
String nid = nodeId.toString();
Resource dynamicLoadCapability = loadNodeResourceFromDRConfiguration(nid);
if (dynamicLoadCapability != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Resource for node: " + nid + " is adjusted from: " + capability + " to: " + dynamicLoadCapability + " due to settings in dynamic-resources.xml.");
}
capability = dynamicLoadCapability;
// sync back with new resource.
response.setResource(capability);
}
// Check if this node has minimum allocations
if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) {
String message = "NodeManager from " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + " signal to the NodeManager.";
LOG.info(message);
response.setDiagnosticsMessage(message);
response.setNodeAction(NodeAction.SHUTDOWN);
return response;
}
response.setContainerTokenMasterKey(containerTokenSecretManager.getCurrentKey());
response.setNMTokenMasterKey(nmTokenSecretManager.getCurrentKey());
RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, resolve(host), capability, nodeManagerVersion, physicalResource);
RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
if (oldNode == null) {
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeStartedEvent(nodeId, request.getNMContainerStatuses(), request.getRunningApplications()));
} else {
LOG.info("Reconnect from the node at: " + host);
this.nmLivelinessMonitor.unregister(nodeId);
// Reset heartbeat ID since node just restarted.
oldNode.resetLastNodeHeartBeatResponse();
this.rmContext.getDispatcher().getEventHandler().handle(new RMNodeReconnectEvent(nodeId, rmNode, request.getRunningApplications(), request.getNMContainerStatuses()));
}
// On every node manager register we will be clearing NMToken keys if
// present for any running application.
this.nmTokenSecretManager.removeNodeKey(nodeId);
this.nmLivelinessMonitor.register(nodeId);
// RMNode inserted
if (!rmContext.isWorkPreservingRecoveryEnabled()) {
if (!request.getNMContainerStatuses().isEmpty()) {
LOG.info("received container statuses on node manager register :" + request.getNMContainerStatuses());
for (NMContainerStatus status : request.getNMContainerStatuses()) {
handleNMContainerStatus(status, nodeId);
}
}
}
// Update node's labels to RM's NodeLabelManager.
Set<String> nodeLabels = NodeLabelsUtils.convertToStringSet(request.getNodeLabels());
if (isDistributedNodeLabelsConf && nodeLabels != null) {
try {
updateNodeLabelsFromNMReport(nodeLabels, nodeId);
response.setAreNodeLabelsAcceptedByRM(true);
} catch (IOException ex) {
// Ensure the exception is captured in the response
response.setDiagnosticsMessage(ex.getMessage());
response.setAreNodeLabelsAcceptedByRM(false);
}
} else if (isDelegatedCentralizedNodeLabelsConf) {
this.rmContext.getRMDelegatedNodeLabelsUpdater().updateNodeLabels(nodeId);
}
StringBuilder message = new StringBuilder();
message.append("NodeManager from node ").append(host).append("(cmPort: ").append(cmPort).append(" httpPort: ");
message.append(httpPort).append(") ").append("registered with capability: ").append(capability);
message.append(", assigned nodeId ").append(nodeId);
if (response.getAreNodeLabelsAcceptedByRM()) {
message.append(", node labels { ").append(StringUtils.join(",", nodeLabels) + " } ");
}
LOG.info(message.toString());
response.setNodeAction(NodeAction.NORMAL);
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
response.setRMVersion(YarnVersionInfo.getVersion());
return response;
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class AdminService method checkForDecommissioningNodes.
@Override
public CheckForDecommissioningNodesResponse checkForDecommissioningNodes(CheckForDecommissioningNodesRequest checkForDecommissioningNodesRequest) throws IOException, YarnException {
final String operation = "checkForDecommissioningNodes";
final String msg = "check for decommissioning nodes.";
UserGroupInformation user = checkAcls("checkForDecommissioningNodes");
checkRMStatus(user.getShortUserName(), operation, msg);
Set<NodeId> decommissioningNodes = rmContext.getNodesListManager().checkForDecommissioningNodes();
RMAuditLogger.logSuccess(user.getShortUserName(), operation, "AdminService");
CheckForDecommissioningNodesResponse response = recordFactory.newRecordInstance(CheckForDecommissioningNodesResponse.class);
response.setDecommissioningNodes(decommissioningNodes);
return response;
}
Aggregations