use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project incubator-systemml by apache.
the class DMLAppMaster method runApplicationMaster.
public void runApplicationMaster(String[] args) throws YarnException, IOException {
_conf = new YarnConfiguration();
// obtain application ID
String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
_appId = containerId.getApplicationAttemptId().getApplicationId();
LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
// initialize clients to ResourceManager
AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
rmClient.init(_conf);
rmClient.start();
// register with ResourceManager
// host, port for rm communication
rmClient.registerApplicationMaster("", 0, "");
LOG.debug("Registered the SystemML application master with resource manager");
// start status reporter to ResourceManager
DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
reporter.start();
LOG.debug("Started status reporter (heartbeat to resource manager)");
// set DMLscript app master context
DMLScript.setActiveAM();
// parse input arguments
String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
// run SystemML CP
FinalApplicationStatus status = null;
try {
// core dml script execution (equivalent to non-AM runtime)
boolean success = DMLScript.executeScript(_conf, otherArgs);
if (success)
status = FinalApplicationStatus.SUCCEEDED;
else
status = FinalApplicationStatus.FAILED;
} catch (DMLScriptException ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
status = FinalApplicationStatus.FAILED;
writeMessageToHDFSWorkingDir(ex.getMessage());
} catch (Exception ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
status = FinalApplicationStatus.FAILED;
} finally {
// stop periodic status reports
reporter.stopStatusReporter();
LOG.debug("Stopped status reporter");
// unregister resource manager client
rmClient.unregisterApplicationMaster(status, "", "");
LOG.debug("Unregistered the SystemML application master");
}
}
use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project incubator-systemml by apache.
the class DMLYarnClient method launchDMLYarnAppmaster.
/**
* Method to launch the dml yarn app master and execute the given dml script
* with the given configuration and jar file.
*
* NOTE: on launching the yarn app master, we do not explicitly probe if we
* are running on a yarn or MR1 cluster. In case of MR1, already the class
* YarnConfiguration will not be found and raise a classnotfound. In case of any
* exception we fall back to run CP directly in the client process.
*
* @return true if dml program successfully executed as yarn app master
* @throws IOException if IOException occurs
* @throws DMLScriptException if DMLScriptException occurs
*/
protected boolean launchDMLYarnAppmaster() throws IOException, DMLScriptException {
boolean ret = false;
String hdfsWD = null;
try {
Timing time = new Timing(true);
// load yarn configuration
YarnConfiguration yconf = new YarnConfiguration();
// create yarn client
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(yconf);
yarnClient.start();
// create application and get the ApplicationID
YarnClientApplication app = yarnClient.createApplication();
ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
ApplicationId appId = appContext.getApplicationId();
LOG.debug("Created application (applicationID: " + appId + ")");
// prepare hdfs working directory via ApplicationID
// copy script, config, jar file to hdfs
hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(_dmlConfig, appId);
copyResourcesToHdfsWorkingDir(yconf, hdfsWD);
// construct command line argument
String command = constructAMCommand(_args, _dmlConfig);
LOG.debug("Constructed application master command: \n" + command);
// set up the container launch context for the application master
ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
amContainer.setCommands(Collections.singletonList(command));
amContainer.setLocalResources(constructLocalResourceMap(yconf));
amContainer.setEnvironment(constructEnvionmentMap(yconf));
// Set up resource type requirements for ApplicationMaster
int memHeap = _dmlConfig.getIntValue(DMLConfig.YARN_APPMASTERMEM);
int memAlloc = (int) computeMemoryAllocation(memHeap);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(memAlloc);
capability.setVirtualCores(NUM_CORES);
LOG.debug("Requested application resources: memory=" + memAlloc + ", vcores=" + NUM_CORES);
// Finally, set-up ApplicationSubmissionContext for the application
String qname = _dmlConfig.getTextValue(DMLConfig.YARN_APPQUEUE);
// application name
appContext.setApplicationName(APPMASTER_NAME);
appContext.setAMContainerSpec(amContainer);
appContext.setResource(capability);
// queue
appContext.setQueue(qname);
LOG.debug("Configured application meta data: name=" + APPMASTER_NAME + ", queue=" + qname);
// submit application (non-blocking)
yarnClient.submitApplication(appContext);
// Check application status periodically (and output web ui address)
ApplicationReport appReport = yarnClient.getApplicationReport(appId);
LOG.info("Application tracking-URL: " + appReport.getTrackingUrl());
YarnApplicationState appState = appReport.getYarnApplicationState();
YarnApplicationState oldState = appState;
LOG.info("Application state: " + appState);
while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) {
// wait for 200ms
Thread.sleep(APP_STATE_INTERVAL);
appReport = yarnClient.getApplicationReport(appId);
appState = appReport.getYarnApplicationState();
if (appState != oldState) {
oldState = appState;
LOG.info("Application state: " + appState);
}
}
// check final status (failed or succeeded)
FinalApplicationStatus finalState = appReport.getFinalApplicationStatus();
LOG.info("Application final status: " + finalState);
// show application and total runtime
double appRuntime = (double) (appReport.getFinishTime() - appReport.getStartTime()) / 1000;
LOG.info("Application runtime: " + appRuntime + " sec.");
LOG.info("Total runtime: " + String.format("%.3f", time.stop() / 1000) + " sec.");
// raised script-level error in case of failed final status
if (finalState != FinalApplicationStatus.SUCCEEDED) {
// propagate script-level stop call message
String stop_msg = readMessageToHDFSWorkingDir(_dmlConfig, yconf, appId);
if (stop_msg != null)
throw new DMLScriptException(stop_msg);
// generic failure message
throw new DMLRuntimeException("DML yarn app master finished with final status: " + finalState + ".");
}
ret = true;
} catch (DMLScriptException ex) {
// rethrow DMLScriptException to propagate stop call
throw ex;
} catch (Exception ex) {
LOG.error("Failed to run DML yarn app master.", ex);
ret = false;
} finally {
// cleanup working directory
if (hdfsWD != null)
MapReduceTool.deleteFileIfExistOnHDFS(hdfsWD);
}
return ret;
}
use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project metron by apache.
the class Client method monitorApplication.
/**
* Monitor the submitted application for completion.
* Kill application if time expires.
* @param appId Application Id of application to be monitored
* @return true if application completed successfully
* @throws YarnException
* @throws IOException
*/
private boolean monitorApplication(ApplicationId appId) throws YarnException, IOException {
while (true) {
// Check app status every 1 second.
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.debug("Thread sleep in monitoring loop interrupted");
}
// Get application report for the appId we are interested in
ApplicationReport report = yarnClient.getApplicationReport(appId);
LOG.info("Got application report from ASM for" + ", appId=" + appId.getId() + ", clientToAMToken=" + report.getClientToAMToken() + ", appDiagnostics=" + report.getDiagnostics() + ", appMasterHost=" + report.getHost() + ", appQueue=" + report.getQueue() + ", appMasterRpcPort=" + report.getRpcPort() + ", appStartTime=" + report.getStartTime() + ", yarnAppState=" + report.getYarnApplicationState().toString() + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() + ", appTrackingUrl=" + report.getTrackingUrl() + ", appUser=" + report.getUser());
YarnApplicationState state = report.getYarnApplicationState();
FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
if (YarnApplicationState.RUNNING == state) {
LOG.info("Application is running...");
return true;
}
if (YarnApplicationState.FINISHED == state) {
if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
LOG.info("Application has completed successfully. Breaking monitoring loop");
return true;
} else {
LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
return false;
}
} else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
return false;
}
if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
LOG.info("Reached client specified timeout for application. Killing application");
forceKillApplication(appId);
return false;
}
}
}
use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project hadoop by apache.
the class TestRMAppAttemptTransitions method testFinishingToFinishing.
@Test
public void testFinishingToFinishing() {
Container amContainer = allocateApplicationAttempt();
launchApplicationAttempt(amContainer);
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
String trackingUrl = "mytrackingurl";
String diagnostics = "Successful";
unregisterApplicationAttempt(amContainer, finalStatus, trackingUrl, diagnostics);
// container must be AM container to move from FINISHING to FINISHED
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 42), ContainerState.COMPLETE, "", 0, amContainer.getResource()), anyNodeId));
testAppAttemptFinishingState(amContainer, finalStatus, trackingUrl, diagnostics);
}
use of org.apache.hadoop.yarn.api.records.FinalApplicationStatus in project hadoop by apache.
the class TestRMAppAttemptTransitions method testUnmanagedAMSuccess.
private void testUnmanagedAMSuccess(String url) {
unmanagedAM = true;
when(submissionContext.getUnmanagedAM()).thenReturn(true);
// submit AM and check it goes to LAUNCHED state
scheduleApplicationAttempt();
testAppAttemptLaunchedState(null);
verify(amLivelinessMonitor, times(1)).register(applicationAttempt.getAppAttemptId());
// launch AM
runApplicationAttempt(null, "host", 8042, url, true);
// complete a container
Container container = mock(Container.class);
when(container.getNodeId()).thenReturn(NodeId.newInstance("host", 1234));
application.handle(new RMAppRunningOnNodeEvent(application.getApplicationId(), container.getNodeId()));
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class), container.getNodeId()));
// complete AM
String diagnostics = "Successful";
FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED;
applicationAttempt.handle(new RMAppAttemptUnregistrationEvent(applicationAttempt.getAppAttemptId(), url, finalStatus, diagnostics));
sendAttemptUpdateSavedEvent(applicationAttempt);
testAppAttemptFinishedState(null, finalStatus, url, diagnostics, 1, true);
assertFalse(transferStateFromPreviousAttempt);
}
Aggregations