Search in sources :

Example 56 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project systemml by apache.

the class DMLAppMaster method runApplicationMaster.

public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();
    // obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
    // initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);
    rmClient.start();
    // register with ResourceManager
    // host, port for rm communication
    rmClient.registerApplicationMaster("", 0, "");
    LOG.debug("Registered the SystemML application master with resource manager");
    // start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");
    // set DMLscript app master context
    DMLScript.setActiveAM();
    // parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
    // run SystemML CP
    FinalApplicationStatus status = null;
    try {
        // core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);
        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        // stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");
        // unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}
Also used : FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) IOException(java.io.IOException) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 57 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.

the class ResourceRequestHandler method createContainerRequest.

/**
 * Setup the request(s) that will be sent to the RM for the container ask.
 */
public ContainerRequest createContainerRequest(ContainerStartRequest csr, boolean first) {
    int priority = csr.container.getResourceRequestPriority();
    // check for node locality constraint
    String[] nodes = null;
    String[] racks = null;
    String host = getHost(csr, first);
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(csr.container.getRequiredMemoryMB());
    capability.setVirtualCores(csr.container.getRequiredVCores());
    if (host == INVALID_HOST) {
        return null;
    }
    if (host != null) {
        nodes = new String[] { host };
        /*
       * if(this.nodeToRack.get(host) != null){ racks = new String[] { this.nodeToRack.get(host) }; }
       */
        return new ContainerRequest(capability, nodes, racks, Priority.newInstance(priority), false);
    }
    // For now, only memory is supported so we set memory requirements
    return new ContainerRequest(capability, nodes, racks, Priority.newInstance(priority));
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)

Example 58 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.

the class BlacklistBasedResourceRequestHandler method reissueContainerRequests.

@Override
public void reissueContainerRequests(AMRMClient<ContainerRequest> amRmClient, Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources, int loopCounter, ResourceRequestHandler resourceRequestor, List<ContainerRequest> containerRequests, List<ContainerRequest> removedContainerRequests) {
    if (!requestedResources.isEmpty()) {
        // Check if any requests timed out, create new requests in that case
        recreateContainerRequest(requestedResources, loopCounter, resourceRequestor, removedContainerRequests);
    }
    // Issue all host specific requests first
    if (!hostSpecificRequestsMap.isEmpty()) {
        LOG.info("Issue Host specific requests first");
        // Blacklist all the nodes and issue request for host specific
        Entry<String, List<ContainerRequest>> set = hostSpecificRequestsMap.entrySet().iterator().next();
        List<ContainerRequest> requests = set.getValue();
        List<String> blacklistNodes = resourceRequestor.getNodesExceptHost(requests.get(0).getNodes());
        amRmClient.updateBlacklist(blacklistNodes, requests.get(0).getNodes());
        blacklistedNodesForHostSpecificRequests = blacklistNodes;
        LOG.info("Sending {} request(s) after blacklisting all nodes other than {}", requests.size(), requests.get(0).getNodes());
        for (ContainerRequest cr : requests) {
            ContainerStartRequest csr = hostSpecificRequests.get(cr);
            ContainerRequest newCr = new ContainerRequest(cr.getCapability(), null, null, cr.getPriority());
            MutablePair<Integer, ContainerRequest> pair = new MutablePair<>(loopCounter, newCr);
            requestedResources.put(csr, pair);
            containerRequests.add(newCr);
            hostSpecificRequests.remove(cr);
        }
        hostSpecificRequestsMap.remove(set.getKey());
    } else {
        if (blacklistedNodesForHostSpecificRequests != null) {
            // Remove the blacklisted nodes during host specific requests
            LOG.debug("All requests done.. Removing nodes from blacklist {}", blacklistedNodesForHostSpecificRequests);
            amRmClient.updateBlacklist(null, blacklistedNodesForHostSpecificRequests);
            blacklistedNodesForHostSpecificRequests = null;
        }
        // Proceed with other requests after host specific requests are done
        if (!otherContainerRequests.isEmpty()) {
            for (Entry<ContainerRequest, ContainerStartRequest> entry : otherContainerRequests.entrySet()) {
                ContainerRequest cr = entry.getKey();
                ContainerStartRequest csr = entry.getValue();
                MutablePair<Integer, ContainerRequest> pair = new MutablePair<>(loopCounter, cr);
                requestedResources.put(csr, pair);
                containerRequests.add(cr);
            }
            otherContainerRequests.clear();
        }
    }
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) List(java.util.List) ArrayList(java.util.ArrayList) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)

Example 59 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.

the class BlacklistBasedResourceRequestHandler method recreateContainerRequest.

private void recreateContainerRequest(Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources, int loopCounter, ResourceRequestHandler resourceRequestor, List<ContainerRequest> removedContainerRequests) {
    for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources.entrySet()) {
        if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
            StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
            removedContainerRequests.add(entry.getValue().getRight());
            ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
            if (cr.getNodes() != null && !cr.getNodes().isEmpty()) {
                addHostSpecificRequest(csr, cr);
            } else {
                otherContainerRequests.put(cr, csr);
            }
        }
    }
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) ContainerStartRequest(com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)59 Test (org.junit.Test)26 Resource (org.apache.hadoop.yarn.api.records.Resource)16 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)15 Configuration (org.apache.hadoop.conf.Configuration)14 Container (org.apache.hadoop.yarn.api.records.Container)12 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)11 IOException (java.io.IOException)10 Priority (org.apache.hadoop.yarn.api.records.Priority)10 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)8 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)8 UpdatedContainer (org.apache.hadoop.yarn.api.records.UpdatedContainer)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)4 Map (java.util.Map)4 TreeSet (java.util.TreeSet)4 MutablePair (org.apache.commons.lang3.tuple.MutablePair)4