Search in sources :

Example 26 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project alluxio by Alluxio.

the class ContainerAllocatorTest method allocateMasterInAnyHost.

@Test(timeout = 1000)
public void allocateMasterInAnyHost() throws Exception {
    ContainerAllocator containerAllocator = new ContainerAllocator(CONTAINER_NAME, 1, 1, mResource, mYarnClient, mRMClient, "any");
    doAnswer(allocateFirstHostAnswer(containerAllocator)).when(mRMClient).addContainerRequest(Matchers.argThat(new ArgumentMatcher<ContainerRequest>() {

        @Override
        public boolean matches(Object o) {
            ContainerRequest request = (ContainerRequest) o;
            if (request.getRelaxLocality() == true && request.getNodes().size() == 1 && request.getNodes().get(0).equals("any")) {
                return true;
            }
            return false;
        }
    }));
    containerAllocator.allocateContainers();
}
Also used : ArgumentMatcher(org.mockito.ArgumentMatcher) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) Test(org.junit.Test)

Example 27 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project asterixdb by apache.

the class AsterixApplicationMaster method hostToRequest.

/**
     * Asks the RM for a particular host, nicely.
     *
     * @param host
     *            The host to request
     * @param cc
     *            Whether or not the host is the CC
     * @return A container request that is (hopefully) for the host we asked for.
     */
private ContainerRequest hostToRequest(String host, boolean cc) throws UnknownHostException {
    InetAddress hostIp = InetAddress.getByName(host);
    Priority pri = Records.newRecord(Priority.class);
    pri.setPriority(0);
    Resource capability = Records.newRecord(Resource.class);
    if (cc) {
        capability.setMemory(ccMem);
    } else {
        capability.setMemory(ncMem);
    }
    //we dont set anything else because we don't care about that and yarn doesn't honor it yet
    String[] hosts = new String[1];
    //TODO this is silly
    hosts[0] = hostIp.getHostName();
    LOG.info("IP addr: " + host + " resolved to " + hostIp.getHostName());
    ContainerRequest request = new ContainerRequest(capability, hosts, null, pri, false);
    LOG.info("Requested host ask: " + request.getNodes());
    return request;
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) InetAddress(java.net.InetAddress)

Example 28 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project incubator-systemml by apache.

the class DMLAppMaster method runApplicationMaster.

public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();
    // obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
    // initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);
    rmClient.start();
    // register with ResourceManager
    // host, port for rm communication
    rmClient.registerApplicationMaster("", 0, "");
    LOG.debug("Registered the SystemML application master with resource manager");
    // start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");
    // set DMLscript app master context
    DMLScript.setActiveAM();
    // parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
    // run SystemML CP
    FinalApplicationStatus status = null;
    try {
        // core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);
        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        // stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");
        // unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}
Also used : FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) IOException(java.io.IOException) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 29 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project pai by Microsoft.

the class ApplicationMaster method addContainerRequest.

private void addContainerRequest(TaskStatus taskStatus) throws Exception {
    String taskRoleName = taskStatus.getTaskRoleName();
    TaskStatusLocator taskLocator = new TaskStatusLocator(taskRoleName, taskStatus.getTaskIndex());
    String logPrefix = String.format("%s: addContainerRequest: ", taskLocator);
    LOGGER.logInfo(logPrefix + "Start");
    // 1. setupContainerRequest, retry later if request is not available.
    Integer setupContainerRequestRetryIntervalSec = CommonUtils.getRandomNumber(conf.getLauncherConfig().getAmSetupContainerRequestMinRetryIntervalSec(), conf.getLauncherConfig().getAmSetupContainerRequestMaxRetryIntervalSec());
    ContainerRequest request;
    try {
        request = setupContainerRequest(taskStatus);
    } catch (NotAvailableException e) {
        LOGGER.logWarning(e, logPrefix + "Failed to setupContainerRequest: " + "ContainerRequest may be temporarily not available. " + "Will retry after %ss.", setupContainerRequestRetryIntervalSec);
        TaskStatus taskStatusSnapshot = YamlUtils.deepCopy(taskStatus, TaskStatus.class);
        transitionTaskStateQueue.queueSystemTaskDelayed(() -> {
            if (statusManager.containsTask(taskStatusSnapshot)) {
                addContainerRequest(taskStatusSnapshot);
            } else {
                LOGGER.logWarning(logPrefix + "Task not found in Status. Ignore it.");
            }
        }, setupContainerRequestRetryIntervalSec * 1000);
        return;
    }
    // 2. addContainerRequest, retry later if request is timeout.
    Integer containerRequestTimeoutSec = CommonUtils.getRandomNumber(conf.getLauncherConfig().getAmContainerRequestMinTimeoutSec(), conf.getLauncherConfig().getAmContainerRequestMaxTimeoutSec());
    LOGGER.logInfo(logPrefix + "Send ContainerRequest to RM with timeout %ss. ContainerRequest: [%s]", containerRequestTimeoutSec, HadoopExts.toString(request));
    rmClient.addContainerRequest(request);
    selectionManager.addContainerRequest(request);
    statusManager.transitionTaskState(taskLocator, TaskState.CONTAINER_REQUESTED, new TaskEvent().setContainerRequest(request));
    transitionTaskStateQueue.queueSystemTaskDelayed(() -> {
        if (statusManager.containsTask(request.getPriority())) {
            LOGGER.logWarning(logPrefix + "ContainerRequest cannot be satisfied within timeout %ss. " + "Cancel it and Request again. ContainerRequest: [%s]", containerRequestTimeoutSec, HadoopExts.toString(request));
            removeContainerRequest(taskStatus);
            statusManager.transitionTaskState(taskLocator, TaskState.TASK_WAITING);
            addContainerRequest(taskStatus);
        }
    }, containerRequestTimeoutSec * 1000);
}
Also used : ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) NotAvailableException(com.microsoft.frameworklauncher.common.exceptions.NotAvailableException)

Example 30 with ContainerRequest

use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project pai by Microsoft.

the class ApplicationMaster method removeContainerRequest.

private void removeContainerRequest(TaskStatus taskStatus) {
    TaskStatusLocator taskLocator = new TaskStatusLocator(taskStatus.getTaskRoleName(), taskStatus.getTaskIndex());
    if (!statusManager.containsTask(taskLocator)) {
        return;
    }
    ContainerRequest request = statusManager.getContainerRequest(taskLocator);
    if (request == null) {
        return;
    }
    try {
        rmClient.removeContainerRequest(request);
    } catch (Exception e) {
        LOGGER.logError(e, "%s: Failed to rmClient.removeContainerRequest", taskLocator);
    }
    try {
        selectionManager.removeContainerRequest(request);
    } catch (Exception e) {
        LOGGER.logError(e, "%s: Failed to selectionManager.removeContainerRequest", taskLocator);
    }
}
Also used : ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) NonTransientException(com.microsoft.frameworklauncher.common.exceptions.NonTransientException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) AggregateException(com.microsoft.frameworklauncher.common.exceptions.AggregateException) NotAvailableException(com.microsoft.frameworklauncher.common.exceptions.NotAvailableException)

Aggregations

ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)59 Test (org.junit.Test)26 Resource (org.apache.hadoop.yarn.api.records.Resource)16 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)15 Configuration (org.apache.hadoop.conf.Configuration)14 Container (org.apache.hadoop.yarn.api.records.Container)12 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)11 IOException (java.io.IOException)10 Priority (org.apache.hadoop.yarn.api.records.Priority)10 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)8 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)8 UpdatedContainer (org.apache.hadoop.yarn.api.records.UpdatedContainer)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)4 Map (java.util.Map)4 TreeSet (java.util.TreeSet)4 MutablePair (org.apache.commons.lang3.tuple.MutablePair)4