Search in sources :

Example 21 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project apex-core by apache.

the class HostLocalTest method testPartitionLocality.

@Test
public void testPartitionLocality() {
    int partitionCount = 3;
    LogicalPlan dag = new LogicalPlan();
    dag.getAttributes().put(com.datatorrent.api.Context.DAGContext.APPLICATION_PATH, new File("target", HostLocalTest.class.getName()).getAbsolutePath());
    dag.setAttribute(OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
    GenericTestOperator o1 = dag.addOperator("o1", GenericTestOperator.class);
    GenericTestOperator partitioned = dag.addOperator("partitioned", GenericTestOperator.class);
    LocalityPartitioner partitioner = new LocalityPartitioner();
    partitioner.setPartitionCount(partitionCount);
    dag.getMeta(partitioned).getAttributes().put(OperatorContext.PARTITIONER, partitioner);
    dag.addStream("o1_outport1", o1.outport1, partitioned.inport1);
    StreamingContainerManager scm = new StreamingContainerManager(dag);
    ResourceRequestHandler rr = new ResourceRequestHandler();
    int containerMem = 1000;
    Map<String, NodeReport> nodeReports = Maps.newHashMap();
    for (int i = 0; i < partitionCount; i++) {
        NodeReport nr = BuilderUtils.newNodeReport(BuilderUtils.newNodeId("host" + (i + 1), 0), NodeState.RUNNING, "httpAddress", "rackName", BuilderUtils.newResource(0, 0), BuilderUtils.newResource(containerMem * 2, 2), 0, null, 0);
        nodeReports.put(nr.getNodeId().getHost(), nr);
    }
    // set resources
    rr.updateNodeReports(Lists.newArrayList(nodeReports.values()));
    Set<String> expectedHosts = Sets.newHashSet();
    for (int i = 0; i < partitionCount; i++) {
        expectedHosts.add("host" + (i + 1));
    }
    for (ContainerStartRequest csr : scm.containerStartRequests) {
        String host = rr.getHost(csr, true);
        if (host != null) {
            expectedHosts.remove(host);
        }
    }
    Assert.assertTrue("All the allocated hosts removed", expectedHosts.isEmpty());
}
Also used : ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) MemoryStorageAgent(com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) File(java.io.File) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 22 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project apex-core by apache.

the class StramMiniClusterTest method testSetupShutdown.

@Test
public void testSetupShutdown() throws Exception {
    GetClusterNodesRequest request = Records.newRecord(GetClusterNodesRequest.class);
    ClientRMService clientRMService = yarnCluster.getResourceManager().getClientRMService();
    GetClusterNodesResponse response = clientRMService.getClusterNodes(request);
    List<NodeReport> nodeReports = response.getNodeReports();
    LOG.info("{}", nodeReports);
    for (NodeReport nr : nodeReports) {
        LOG.info("Node: {}", nr.getNodeId());
        LOG.info("Total memory: {}", nr.getCapability());
        LOG.info("Used memory: {}", nr.getUsed());
        LOG.info("Number containers: {}", nr.getNumContainers());
    }
    JarHelper jarHelper = new JarHelper();
    LOG.info("engine jar: {}", jarHelper.getJar(StreamingAppMaster.class));
    LOG.info("engine test jar: {}", jarHelper.getJar(StramMiniClusterTest.class));
    // create test application
    Properties dagProps = new Properties();
    // input module (ensure shutdown works while windows are generated)
    dagProps.put(StreamingApplication.APEX_PREFIX + "operator.numGen.classname", TestGeneratorInputOperator.class.getName());
    dagProps.put(StreamingApplication.APEX_PREFIX + "operator.numGen.maxTuples", "1");
    dagProps.put(StreamingApplication.APEX_PREFIX + "operator.module1.classname", GenericTestOperator.class.getName());
    dagProps.put(StreamingApplication.APEX_PREFIX + "operator.module2.classname", GenericTestOperator.class.getName());
    dagProps.put(StreamingApplication.APEX_PREFIX + "stream.fromNumGen.source", "numGen.outport");
    dagProps.put(StreamingApplication.APEX_PREFIX + "stream.fromNumGen.sinks", "module1.inport1");
    dagProps.put(StreamingApplication.APEX_PREFIX + "stream.n1n2.source", "module1.outport1");
    dagProps.put(StreamingApplication.APEX_PREFIX + "stream.n1n2.sinks", "module2.inport1");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + LogicalPlan.MASTER_MEMORY_MB.getName(), "128");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + LogicalPlan.CONTAINER_JVM_OPTIONS.getName(), "-Dlog4j.properties=custom_log4j.properties");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + "operator.*." + OperatorContext.MEMORY_MB.getName(), "64");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + "operator.*." + OperatorContext.VCORES.getName(), "1");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + "operator.*.port.*." + Context.PortContext.BUFFER_MEMORY_MB.getName(), "32");
    dagProps.setProperty(StreamingApplication.APEX_PREFIX + LogicalPlan.DEBUG.getName(), "true");
    LOG.info("dag properties: {}", dagProps);
    LOG.info("Initializing Client");
    LogicalPlanConfiguration tb = new LogicalPlanConfiguration(conf);
    tb.addFromProperties(dagProps, null);
    LogicalPlan dag = createDAG(tb);
    Configuration yarnConf = new Configuration(yarnCluster.getConfig());
    StramClient client = new StramClient(yarnConf, dag);
    try {
        client.start();
        if (StringUtils.isBlank(System.getenv("JAVA_HOME"))) {
            // JAVA_HOME not set in the yarn mini cluster
            client.javaCmd = "java";
        }
        LOG.info("Running client");
        client.startApplication();
        boolean result = client.monitorApplication();
        LOG.info("Client run completed. Result=" + result);
        Assert.assertTrue(result);
    } finally {
        client.stop();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LogicalPlanConfiguration(com.datatorrent.stram.plan.logical.LogicalPlanConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) GetClusterNodesResponse(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) Properties(java.util.Properties) ClientRMService(org.apache.hadoop.yarn.server.resourcemanager.ClientRMService) LogicalPlanConfiguration(com.datatorrent.stram.plan.logical.LogicalPlanConfiguration) JarHelper(org.apache.apex.common.util.JarHelper) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) GetClusterNodesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 23 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project cdap by caskdata.

the class YarnCheck method run.

@Override
public void run() {
    int yarnConnectTimeout = cConf.getInt(Constants.Startup.YARN_CONNECT_TIMEOUT_SECONDS, 60);
    LOG.info("Checking YARN availability -- may take up to {} seconds.", yarnConnectTimeout);
    final YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(hConf);
    List<NodeReport> nodeReports;
    // if yarn is not up, yarnClient.start() will hang.
    ExecutorService executorService = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("startup-checker").build());
    try {
        Future<List<NodeReport>> result = executorService.submit(new Callable<List<NodeReport>>() {

            @Override
            public List<NodeReport> call() throws Exception {
                yarnClient.start();
                return yarnClient.getNodeReports();
            }
        });
        nodeReports = result.get(yarnConnectTimeout, TimeUnit.SECONDS);
        LOG.info("  YARN availability successfully verified.");
    } catch (Exception e) {
        throw new RuntimeException("Unable to get status of YARN nodemanagers. " + "Please check that YARN is running " + "and that the correct Hadoop configuration (core-site.xml, yarn-site.xml) and libraries " + "are included in the CDAP master classpath.", e);
    } finally {
        try {
            yarnClient.stop();
        } catch (Exception e) {
            LOG.warn("Error stopping yarn client.", e);
        } finally {
            executorService.shutdown();
        }
    }
    checkResources(nodeReports);
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) List(java.util.List) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport)

Example 24 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project cdap by caskdata.

the class YarnCheck method checkResources.

private void checkResources(List<NodeReport> nodeReports) {
    LOG.info("Checking that YARN has enough resources to run all system services.");
    int memoryCapacity = 0;
    int vcoresCapacity = 0;
    int memoryUsed = 0;
    int vcoresUsed = 0;
    int availableNodes = 0;
    for (NodeReport nodeReport : nodeReports) {
        NodeId nodeId = nodeReport.getNodeId();
        LOG.debug("Got report for node {}", nodeId);
        if (!nodeReport.getNodeState().isUnusable()) {
            Resource nodeCapability = nodeReport.getCapability();
            Resource nodeUsed = nodeReport.getUsed();
            // some versions of hadoop return null, others do not
            if (nodeCapability != null) {
                LOG.debug("node {} resource capability: memory = {}, vcores = {}", nodeId, nodeCapability.getMemory(), nodeCapability.getVirtualCores());
                memoryCapacity += nodeCapability.getMemory();
                vcoresCapacity += nodeCapability.getVirtualCores();
            }
            if (nodeUsed != null) {
                LOG.debug("node {} resources used: memory = {}, vcores = {}", nodeId, nodeUsed.getMemory(), nodeUsed.getVirtualCores());
                memoryUsed += nodeUsed.getMemory();
                vcoresUsed += nodeUsed.getVirtualCores();
            }
            availableNodes++;
        }
    }
    LOG.debug("YARN resource capacity: {} MB of memory and {} virtual cores.", memoryCapacity, vcoresCapacity);
    LOG.debug("YARN resources used: {} MB of memory and {} virtual cores.", memoryUsed, vcoresUsed);
    // calculate memory and vcores required by CDAP
    int requiredMemoryMB = 0;
    int requiredVCores = 0;
    Set<String> invalidKeys = new HashSet<>();
    for (ServiceResourceKeys serviceResourceKeys : systemServicesResourceKeys) {
        boolean hasConfigError = false;
        int instances = 0;
        int memoryMB = 0;
        int vcores = 0;
        try {
            instances = serviceResourceKeys.getInstances();
        } catch (Exception e) {
            invalidKeys.add(serviceResourceKeys.getInstancesKey());
            hasConfigError = true;
        }
        try {
            memoryMB = serviceResourceKeys.getMemory();
        } catch (Exception e) {
            invalidKeys.add(serviceResourceKeys.getMemoryKey());
            hasConfigError = true;
        }
        try {
            vcores = serviceResourceKeys.getVcores();
        } catch (Exception e) {
            invalidKeys.add(serviceResourceKeys.getVcoresKey());
            hasConfigError = true;
        }
        if (!hasConfigError) {
            LOG.debug("Resource settings for system service {}: {}={}, {}={}, {}={}", serviceResourceKeys.getServiceName(), serviceResourceKeys.getInstancesKey(), instances, serviceResourceKeys.getMemoryKey(), memoryMB, serviceResourceKeys.getVcoresKey(), vcores);
            requiredMemoryMB += memoryMB * instances;
            requiredVCores += vcores * instances;
        }
    }
    if (!invalidKeys.isEmpty()) {
        throw new RuntimeException("YARN resources check failed to invalid config settings for keys: " + Joiner.on(',').join(invalidKeys));
    }
    LOG.debug("{} MB of memory and {} virtual cores are required.", requiredMemoryMB, requiredVCores);
    checkResources(requiredMemoryMB, requiredVCores, memoryCapacity, vcoresCapacity, "in capacity");
    int availableMemoryMB = memoryCapacity - memoryUsed;
    int availableVCores = vcoresCapacity - vcoresUsed;
    try {
        checkResources(requiredMemoryMB, requiredVCores, availableMemoryMB, availableVCores, "available");
    } catch (Exception e) {
        LOG.warn(e.getMessage());
    }
    LOG.info("  YARN resources successfully verified.");
}
Also used : NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) HashSet(java.util.HashSet)

Example 25 with NodeReport

use of org.apache.hadoop.yarn.api.records.NodeReport in project incubator-systemml by apache.

the class YarnClusterAnalyzer method getClusterUtilization.

public static double getClusterUtilization() throws IOException {
    double util = 0;
    try {
        if (_client == null)
            _client = createYarnClient();
        List<NodeReport> nodesReport = _client.getNodeReports();
        double maxMem = 0;
        double currMem = 0;
        long maxCores = 0;
        long currCores = 0;
        for (NodeReport node : nodesReport) {
            Resource max = node.getCapability();
            Resource used = node.getUsed();
            maxMem += max.getMemory();
            currMem += used.getMemory();
            maxCores += max.getVirtualCores();
            currCores += used.getVirtualCores();
        }
        util = Math.max(// memory util
        Math.min(1, currMem / maxMem), // vcore util
        Math.min(1, (double) currCores / maxCores));
    } catch (Exception ex) {
        throw new IOException(ex);
    }
    return util;
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) IOException(java.io.IOException) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Aggregations

NodeReport (org.apache.hadoop.yarn.api.records.NodeReport)49 Test (org.junit.Test)18 ArrayList (java.util.ArrayList)17 Resource (org.apache.hadoop.yarn.api.records.Resource)14 GenericTestOperator (com.datatorrent.stram.engine.GenericTestOperator)10 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)10 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)9 MemoryStorageAgent (com.datatorrent.stram.support.StramTestSupport.MemoryStorageAgent)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9 File (java.io.File)8 NodeState (org.apache.hadoop.yarn.api.records.NodeState)7 HashMap (java.util.HashMap)6 HashSet (java.util.HashSet)6 Priority (org.apache.hadoop.yarn.api.records.Priority)6 IOException (java.io.IOException)5 Map (java.util.Map)5 Container (org.apache.hadoop.yarn.api.records.Container)5 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)5 PrintWriter (java.io.PrintWriter)4 Configuration (org.apache.hadoop.conf.Configuration)4