Search in sources :

Example 26 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project ignite by apache.

the class ApplicationMaster method onNodesUpdated.

/**
 * {@inheritDoc}
 */
public synchronized void onNodesUpdated(List<NodeReport> updated) {
    for (NodeReport node : updated) {
        // If node unusable.
        if (node.getNodeState().isUnusable()) {
            for (IgniteContainer cont : containers.values()) {
                if (cont.nodeId().equals(node.getNodeId())) {
                    containers.remove(cont.id());
                    log.log(Level.WARNING, "Node is unusable. Node: {0}, state: {1}.", new Object[] { node.getNodeId().getHost(), node.getNodeState() });
                }
            }
            log.log(Level.WARNING, "Node is unusable. Node: {0}, state: {1}.", new Object[] { node.getNodeId().getHost(), node.getNodeState() });
        }
    }
}
Also used : NodeReport(org.apache.hadoop.yarn.api.records.NodeReport)

Example 27 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project incubator-systemml by apache.

the class YarnClusterAnalyzer method analyzeYarnCluster.

/**
 * Analyzes properties of Yarn cluster and Hadoop configurations.
 *
 * @param yarnClient hadoop yarn client
 * @param conf hadoop yarn configuration
 * @param verbose output info to standard output
 */
public static void analyzeYarnCluster(YarnClient yarnClient, YarnConfiguration conf, boolean verbose) {
    try {
        List<NodeReport> nodesReport = yarnClient.getNodeReports();
        if (verbose)
            System.out.println("There are " + nodesReport.size() + " nodes in the cluster");
        if (nodesReport.isEmpty())
            throw new YarnException("There are zero available nodes in the yarn cluster");
        nodesMaxPhySorted = new ArrayList<>(nodesReport.size());
        clusterTotalMem = 0;
        clusterTotalCores = 0;
        clusterTotalNodes = 0;
        minimumMRContainerPhyMB = -1;
        for (NodeReport node : nodesReport) {
            Resource resource = node.getCapability();
            Resource used = node.getUsed();
            if (used == null)
                used = Resource.newInstance(0, 0);
            int mb = resource.getMemory();
            int cores = resource.getVirtualCores();
            if (mb <= 0)
                throw new YarnException("A node has non-positive memory " + mb);
            int myMinMRPhyMB = mb / cores / CPU_HYPER_FACTOR;
            if (minimumMRContainerPhyMB < myMinMRPhyMB)
                // minimumMRContainerPhyMB needs to be the largest among the mins
                minimumMRContainerPhyMB = myMinMRPhyMB;
            clusterTotalMem += (long) mb * 1024 * 1024;
            nodesMaxPhySorted.add((long) mb * 1024 * 1024);
            clusterTotalCores += cores;
            clusterTotalNodes++;
            if (verbose)
                System.out.println("\t" + node.getNodeId() + " has " + mb + " MB (" + used.getMemory() + " MB used) memory and " + resource.getVirtualCores() + " (" + used.getVirtualCores() + " used) cores");
        }
        Collections.sort(nodesMaxPhySorted, Collections.reverseOrder());
        nodesMaxBudgetSorted = new ArrayList<>(nodesMaxPhySorted.size());
        for (int i = 0; i < nodesMaxPhySorted.size(); i++) nodesMaxBudgetSorted.add(ResourceOptimizer.phyToBudget(nodesMaxPhySorted.get(i)));
        _remotePar = nodesReport.size();
        if (_remotePar == 0)
            throw new YarnException("There are no available nodes in the yarn cluster");
        // Now get the default cluster settings
        // 100MB
        _remoteMRSortMem = (1024 * 1024) * conf.getLong(MRConfigurationNames.MR_TASK_IO_SORT_MB, 100);
        // handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
        // (for robustness we probe both: child and map configuration parameters)
        // internally mapred/mapreduce synonym
        String javaOpts1 = conf.get(MRConfigurationNames.MR_CHILD_JAVA_OPTS);
        // internally mapred/mapreduce synonym
        String javaOpts2 = conf.get(MRConfigurationNames.MR_MAP_JAVA_OPTS, null);
        // internally mapred/mapreduce synonym
        String javaOpts3 = conf.get(MRConfigurationNames.MR_REDUCE_JAVA_OPTS, null);
        if (// specific value overrides generic
        javaOpts2 != null)
            _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts2);
        else
            _remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts1);
        if (// specific value overrides generic
        javaOpts3 != null)
            _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts3);
        else
            _remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
        // HDFS blocksize
        String blocksize = conf.get(MRConfigurationNames.DFS_BLOCKSIZE, "134217728");
        _blocksize = Long.parseLong(blocksize);
        minimalPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
        maximumPhyAllocate = (long) 1024 * 1024 * conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
        mrAMPhy = (long) conf.getInt(MRConfigurationNames.YARN_APP_MR_AM_RESOURCE_MB, 1536) * 1024 * 1024;
    } catch (Exception e) {
        throw new RuntimeException("Unable to analyze yarn cluster ", e);
    }
/*
		 * This is for AppMaster to query available resource in the cluster during heartbeat 
		 * 
		AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
		rmClient.init(conf);
		rmClient.start();
		AllocateResponse response = rmClient.allocate(0);
		int nodeCount = response.getNumClusterNodes();
		Resource resource = response.getAvailableResources();
		List<NodeReport> nodeUpdate = response.getUpdatedNodes();
		
		LOG.info("This is a " + nodeCount + " node cluster with totally " +
				resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores");
		LOG.info(nodereport.size() + " updatedNode reports received");
		for (NodeReport node : nodeUpdate) {
			resource = node.getCapability();
			LOG.info(node.getNodeId() + " updated with " + resource.getMemory() + " memory and " + resource.getVirtualCores() + " cores");
		}*/
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Example 28 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project cdap by caskdata.

the class YarnNodes method collect.

@Override
public synchronized void collect() throws Exception {
    reset();
    List<NodeReport> nodeReports;
    YarnClient yarnClient = createYARNClient();
    try {
        nodeReports = yarnClient.getNodeReports();
    } finally {
        yarnClient.stop();
    }
    for (NodeReport nodeReport : nodeReports) {
        switch(nodeReport.getNodeState()) {
            case RUNNING:
                healthyNodes++;
                healthyContainers += nodeReport.getNumContainers();
                break;
            case UNHEALTHY:
            case DECOMMISSIONED:
            case LOST:
                unusableNodes++;
                unusableContainers += nodeReport.getNumContainers();
                break;
            case NEW:
            case REBOOTED:
                newNodes++;
                newContainers += nodeReport.getNumContainers();
                break;
        }
    }
}
Also used : NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient)

Example 29 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project cdap by caskdata.

the class YarnResources method collect.

@Override
public synchronized void collect() throws Exception {
    reset();
    List<NodeReport> nodeReports;
    YarnClient yarnClient = createYARNClient();
    try {
        nodeReports = yarnClient.getNodeReports();
    } finally {
        yarnClient.stop();
    }
    for (NodeReport nodeReport : nodeReports) {
        NodeId nodeId = nodeReport.getNodeId();
        LOG.debug("Got report for node {}", nodeId);
        if (!nodeReport.getNodeState().isUnusable()) {
            Resource nodeCapability = nodeReport.getCapability();
            Resource nodeUsed = nodeReport.getUsed();
            // some versions of hadoop return null, others do not
            if (nodeCapability != null) {
                LOG.debug("node {} resource capability: memory = {}, vcores = {}", nodeId, nodeCapability.getMemory(), nodeCapability.getVirtualCores());
                totalMemory += nodeCapability.getMemory();
                totalVCores += nodeCapability.getVirtualCores();
            }
            if (nodeUsed != null) {
                LOG.debug("node {} resources used: memory = {}, vcores = {}", nodeId, nodeUsed.getMemory(), nodeUsed.getVirtualCores());
                usedMemory += nodeUsed.getMemory();
                usedVCores += nodeUsed.getVirtualCores();
            }
        }
    }
}
Also used : NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient)

Example 30 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project hive by apache.

the class LlapTaskSchedulerService method addNode.

private void addNode(NodeInfo node, ServiceInstance serviceInstance) {
    // we have just added a new node. Signal timeout monitor to reset timer
    if (activeInstances.size() != 0 && timeoutFutureRef.get() != null) {
        LOG.info("New node added. Signalling scheduler timeout monitor thread to stop timer.");
        stopTimeoutMonitor();
    }
    NodeReport nodeReport = constructNodeReport(serviceInstance, true);
    getContext().nodesUpdated(Collections.singletonList(nodeReport));
    // When the same node goes away and comes back... the old entry will be lost - which means
    // we don't know how many fragments we have actually scheduled on this node.
    // Replacing it is the right thing to do though, since we expect the AM to kill all the fragments running on the node, via timeouts.
    // De-allocate messages coming in from the old node are sent to the NodeInfo instance for the old node.
    instanceToNodeMap.put(node.getNodeIdentity(), node);
    if (metrics != null) {
        metrics.setClusterNodeCount(activeInstances.size());
    }
    // Trigger scheduling since a new node became available.
    LOG.info("Adding new node: {}. TotalNodeCount={}. activeInstances.size={}", node, instanceToNodeMap.size(), activeInstances.size());
    trySchedulingPendingTasks();
}
Also used : NodeReport(org.apache.hadoop.yarn.api.records.NodeReport)

Aggregations

NodeReport (org.apache.hadoop.yarn.api.records.NodeReport)49 Test (org.junit.Test)18 ArrayList (java.util.ArrayList)17 Resource (org.apache.hadoop.yarn.api.records.Resource)14 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)10 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)10 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)9 MemoryStorageAgent (com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9 File (java.io.File)8 NodeState (org.apache.hadoop.yarn.api.records.NodeState)7 HashMap (java.util.HashMap)6 HashSet (java.util.HashSet)6 Priority (org.apache.hadoop.yarn.api.records.Priority)6 IOException (java.io.IOException)5 Map (java.util.Map)5 Container (org.apache.hadoop.yarn.api.records.Container)5 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)5 PrintWriter (java.io.PrintWriter)4 Configuration (org.apache.hadoop.conf.Configuration)4