Search in sources :

Example 16 with FinalApplicationStatus

use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project incubator-systemml by apache.

the class DMLAppMaster method runApplicationMaster.

public void runApplicationMaster(String[] args) throws YarnException, IOException {
    _conf = new YarnConfiguration();
    // obtain application ID
    String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    _appId = containerId.getApplicationAttemptId().getApplicationId();
    LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
    // initialize clients to ResourceManager
    AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
    rmClient.init(_conf);
    rmClient.start();
    // register with ResourceManager
    // host, port for rm communication
    rmClient.registerApplicationMaster("", 0, "");
    LOG.debug("Registered the SystemML application master with resource manager");
    // start status reporter to ResourceManager
    DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
    reporter.start();
    LOG.debug("Started status reporter (heartbeat to resource manager)");
    // set DMLscript app master context
    DMLScript.setActiveAM();
    // parse input arguments
    String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
    // run SystemML CP
    FinalApplicationStatus status = null;
    try {
        // core dml script execution (equivalent to non-AM runtime)
        boolean success = DMLScript.executeScript(_conf, otherArgs);
        if (success)
            status = FinalApplicationStatus.SUCCEEDED;
        else
            status = FinalApplicationStatus.FAILED;
    } catch (DMLScriptException ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
        status = FinalApplicationStatus.FAILED;
        writeMessageToHDFSWorkingDir(ex.getMessage());
    } catch (Exception ex) {
        LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
        status = FinalApplicationStatus.FAILED;
    } finally {
        // stop periodic status reports
        reporter.stopStatusReporter();
        LOG.debug("Stopped status reporter");
        // unregister resource manager client
        rmClient.unregisterApplicationMaster(status, "", "");
        LOG.debug("Unregistered the SystemML application master");
    }
}
Also used : FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) IOException(java.io.IOException) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 17 with FinalApplicationStatus

use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project incubator-systemml by apache.

the class DMLYarnClient method launchDMLYarnAppmaster.

/**
 * Method to launch the dml yarn app master and execute the given dml script
 * with the given configuration and jar file.
 *
 * NOTE: on launching the yarn app master, we do not explicitly probe if we
 *	  are running on a yarn or MR1 cluster. In case of MR1, already the class
 *	  YarnConfiguration will not be found and raise a classnotfound. In case of any
 *	  exception we fall back to run CP directly in the client process.
 *
 * @return true if dml program successfully executed as yarn app master
 * @throws IOException if IOException occurs
 * @throws DMLScriptException if DMLScriptException occurs
 */
protected boolean launchDMLYarnAppmaster() throws IOException, DMLScriptException {
    boolean ret = false;
    String hdfsWD = null;
    try {
        Timing time = new Timing(true);
        // load yarn configuration
        YarnConfiguration yconf = new YarnConfiguration();
        // create yarn client
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(yconf);
        yarnClient.start();
        // create application and get the ApplicationID
        YarnClientApplication app = yarnClient.createApplication();
        ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
        ApplicationId appId = appContext.getApplicationId();
        LOG.debug("Created application (applicationID: " + appId + ")");
        // prepare hdfs working directory via ApplicationID
        // copy script, config, jar file to hdfs
        hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(_dmlConfig, appId);
        copyResourcesToHdfsWorkingDir(yconf, hdfsWD);
        // construct command line argument
        String command = constructAMCommand(_args, _dmlConfig);
        LOG.debug("Constructed application master command: \n" + command);
        // set up the container launch context for the application master
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        amContainer.setCommands(Collections.singletonList(command));
        amContainer.setLocalResources(constructLocalResourceMap(yconf));
        amContainer.setEnvironment(constructEnvionmentMap(yconf));
        // Set up resource type requirements for ApplicationMaster
        int memHeap = _dmlConfig.getIntValue(DMLConfig.YARN_APPMASTERMEM);
        int memAlloc = (int) computeMemoryAllocation(memHeap);
        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(memAlloc);
        capability.setVirtualCores(NUM_CORES);
        LOG.debug("Requested application resources: memory=" + memAlloc + ", vcores=" + NUM_CORES);
        // Finally, set-up ApplicationSubmissionContext for the application
        String qname = _dmlConfig.getTextValue(DMLConfig.YARN_APPQUEUE);
        // application name
        appContext.setApplicationName(APPMASTER_NAME);
        appContext.setAMContainerSpec(amContainer);
        appContext.setResource(capability);
        // queue
        appContext.setQueue(qname);
        LOG.debug("Configured application meta data: name=" + APPMASTER_NAME + ", queue=" + qname);
        // submit application (non-blocking)
        yarnClient.submitApplication(appContext);
        // Check application status periodically (and output web ui address)
        ApplicationReport appReport = yarnClient.getApplicationReport(appId);
        LOG.info("Application tracking-URL: " + appReport.getTrackingUrl());
        YarnApplicationState appState = appReport.getYarnApplicationState();
        YarnApplicationState oldState = appState;
        LOG.info("Application state: " + appState);
        while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) {
            // wait for 200ms
            Thread.sleep(APP_STATE_INTERVAL);
            appReport = yarnClient.getApplicationReport(appId);
            appState = appReport.getYarnApplicationState();
            if (appState != oldState) {
                oldState = appState;
                LOG.info("Application state: " + appState);
            }
        }
        // check final status (failed or succeeded)
        FinalApplicationStatus finalState = appReport.getFinalApplicationStatus();
        LOG.info("Application final status: " + finalState);
        // show application and total runtime
        double appRuntime = (double) (appReport.getFinishTime() - appReport.getStartTime()) / 1000;
        LOG.info("Application runtime: " + appRuntime + " sec.");
        LOG.info("Total runtime: " + String.format("%.3f", time.stop() / 1000) + " sec.");
        // raised script-level error in case of failed final status
        if (finalState != FinalApplicationStatus.SUCCEEDED) {
            // propagate script-level stop call message
            String stop_msg = readMessageToHDFSWorkingDir(_dmlConfig, yconf, appId);
            if (stop_msg != null)
                throw new DMLScriptException(stop_msg);
            // generic failure message
            throw new DMLRuntimeException("DML yarn app master finished with final status: " + finalState + ".");
        }
        ret = true;
    } catch (DMLScriptException ex) {
        // rethrow DMLScriptException to propagate stop call
        throw ex;
    } catch (Exception ex) {
        LOG.error("Failed to run DML yarn app master.", ex);
        ret = false;
    } finally {
        // cleanup working directory
        if (hdfsWD != null)
            MapReduceTool.deleteFileIfExistOnHDFS(hdfsWD);
    }
    return ret;
}
Also used : YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) DMLScriptException(org.apache.sysml.runtime.DMLScriptException) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 18 with FinalApplicationStatus

use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project metron by apache.

the class Client method monitorApplication.

/**
 * Monitor the submitted application for completion.
 * Kill application if time expires.
 * @param appId Application Id of application to be monitored
 * @return true if application completed successfully
 * @throws YarnException
 * @throws IOException
 */
private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {
    while (true) {
        // Check app status every 1 second.
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.debug("Thread sleep in monitoring loop interrupted");
        }
        // Get application report for the appId we are interested in
        ApplicationReport report = yarnClient.getApplicationReport(appId);
        LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser());
        YarnApplicationState state = report.getYarnApplicationState();
        FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
        if (YarnApplicationState.RUNNING == state) {
            LOG.info("Application is running...");
            return true;
        }
        if (YarnApplicationState.FINISHED == state) {
            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                LOG.info("Application has completed successfully. Breaking monitoring loop");
                return true;
            } else {
                LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }
        } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
            LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
            return false;
        }
        if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
            LOG.info("Reached client specified timeout for application. Killing application");
            forceKillApplication(appId);
            return false;
        }
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState)

Example 19 with FinalApplicationStatus

use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project hadoop by apache.

the class TestRMAppAttemptTransitions method testFinishingToFinishing.

@Test
public void testFinishingToFinishing() {
    Container amContainer = allocateApplicationAttempt();
    launchApplicationAttempt(amContainer);
    runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    String trackingUrl = "mytrackingurl";
    String diagnostics = "Successful";
    unregisterApplicationAttempt(amContainer, finalStatus, trackingUrl, diagnostics);
    // container must be AM container to move from FINISHING to FINISHED
    NodeId anyNodeId = NodeId.newInstance("host", 1234);
    applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 42), ContainerState.COMPLETE, "", 0, amContainer.getResource()), anyNodeId));
    testAppAttemptFinishingState(amContainer, finalStatus, trackingUrl, diagnostics);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMAppAttemptContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent) Test(org.junit.Test)

Example 20 with FinalApplicationStatus

use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project hadoop by apache.

the class TestRMAppAttemptTransitions method testUnmanagedAMSuccess.

private void testUnmanagedAMSuccess(String url) {
    unmanagedAM = true;
    when(submissionContext.getUnmanagedAM()).thenReturn(true);
    // submit AM and check it goes to LAUNCHED state
    scheduleApplicationAttempt();
    testAppAttemptLaunchedState(null);
    verify(amLivelinessMonitor, times(1)).register(applicationAttempt.getAppAttemptId());
    // launch AM
    runApplicationAttempt(null, "host", 8042, url, true);
    // complete a container
    Container container = mock(Container.class);
    when(container.getNodeId()).thenReturn(NodeId.newInstance("host", 1234));
    application.handle(new RMAppRunningOnNodeEvent(application.getApplicationId(), container.getNodeId()));
    applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class), container.getNodeId()));
    // complete AM
    String diagnostics = "Successful";
    FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
    applicationAttempt.handle(new RMAppAttemptUnregistrationEvent(applicationAttempt.getAppAttemptId(), url, finalStatus, diagnostics));
    sendAttemptUpdateSavedEvent(applicationAttempt);
    testAppAttemptFinishedState(null, finalStatus, url, diagnostics, 1, true);
    assertFalse(transferStateFromPreviousAttempt);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) RMAppRunningOnNodeEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent) RMAppAttemptContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent) RMAppAttemptUnregistrationEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent)

Aggregations

FinalApplicationStatus (org.apache.hadoop.yarn.api.records.FinalApplicationStatus)27 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)9 YarnApplicationState (org.apache.hadoop.yarn.api.records.YarnApplicationState)9 IOException (java.io.IOException)8 Container (org.apache.hadoop.yarn.api.records.Container)8 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)7 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)6 Test (org.junit.Test)6 RMAppAttemptContainerFinishedEvent (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent)5 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)4 RMAppAttemptUnregistrationEvent (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)3 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 NodeId (org.apache.hadoop.yarn.api.records.NodeId)2