use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project systemml by apache.
the class DMLAppMaster method runApplicationMaster.
public void runApplicationMaster(String[] args) throws YarnException, IOException {
_conf = new YarnConfiguration();
// obtain application ID
String containerIdString = System.getenv(Environment.CONTAINER_ID.name());
ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
_appId = containerId.getApplicationAttemptId().getApplicationId();
LOG.info("SystemML appplication master (applicationID: " + _appId + ")");
// initialize clients to ResourceManager
AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
rmClient.init(_conf);
rmClient.start();
// register with ResourceManager
// host, port for rm communication
rmClient.registerApplicationMaster("", 0, "");
LOG.debug("Registered the SystemML application master with resource manager");
// start status reporter to ResourceManager
DMLAppMasterStatusReporter reporter = new DMLAppMasterStatusReporter(rmClient, 10000);
reporter.start();
LOG.debug("Started status reporter (heartbeat to resource manager)");
// set DMLscript app master context
DMLScript.setActiveAM();
// parse input arguments
String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();
// run SystemML CP
FinalApplicationStatus status = null;
try {
// core dml script execution (equivalent to non-AM runtime)
boolean success = DMLScript.executeScript(_conf, otherArgs);
if (success)
status = FinalApplicationStatus.SUCCEEDED;
else
status = FinalApplicationStatus.FAILED;
} catch (DMLScriptException ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script due to stop call:\n\t" + ex.getMessage());
status = FinalApplicationStatus.FAILED;
writeMessageToHDFSWorkingDir(ex.getMessage());
} catch (Exception ex) {
LOG.error(DMLYarnClient.APPMASTER_NAME + ": Failed to executed DML script.", ex);
status = FinalApplicationStatus.FAILED;
} finally {
// stop periodic status reports
reporter.stopStatusReporter();
LOG.debug("Stopped status reporter");
// unregister resource manager client
rmClient.unregisterApplicationMaster(status, "", "");
LOG.debug("Unregistered the SystemML application master");
}
}
use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.
the class ResourceRequestHandler method createContainerRequest.
/**
* Setup the request(s) that will be sent to the RM for the container ask.
*/
public ContainerRequest createContainerRequest(ContainerStartRequest csr, boolean first) {
int priority = csr.container.getResourceRequestPriority();
// check for node locality constraint
String[] nodes = null;
String[] racks = null;
String host = getHost(csr, first);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(csr.container.getRequiredMemoryMB());
capability.setVirtualCores(csr.container.getRequiredVCores());
if (host == INVALID_HOST) {
return null;
}
if (host != null) {
nodes = new String[] { host };
/*
* if(this.nodeToRack.get(host) != null){ racks = new String[] { this.nodeToRack.get(host) }; }
*/
return new ContainerRequest(capability, nodes, racks, Priority.newInstance(priority), false);
}
// For now, only memory is supported so we set memory requirements
return new ContainerRequest(capability, nodes, racks, Priority.newInstance(priority));
}
use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.
the class BlacklistBasedResourceRequestHandler method reissueContainerRequests.
@Override
public void reissueContainerRequests(AMRMClient<ContainerRequest> amRmClient, Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources, int loopCounter, ResourceRequestHandler resourceRequestor, List<ContainerRequest> containerRequests, List<ContainerRequest> removedContainerRequests) {
if (!requestedResources.isEmpty()) {
// Check if any requests timed out, create new requests in that case
recreateContainerRequest(requestedResources, loopCounter, resourceRequestor, removedContainerRequests);
}
// Issue all host specific requests first
if (!hostSpecificRequestsMap.isEmpty()) {
LOG.info("Issue Host specific requests first");
// Blacklist all the nodes and issue request for host specific
Entry<String, List<ContainerRequest>> set = hostSpecificRequestsMap.entrySet().iterator().next();
List<ContainerRequest> requests = set.getValue();
List<String> blacklistNodes = resourceRequestor.getNodesExceptHost(requests.get(0).getNodes());
amRmClient.updateBlacklist(blacklistNodes, requests.get(0).getNodes());
blacklistedNodesForHostSpecificRequests = blacklistNodes;
LOG.info("Sending {} request(s) after blacklisting all nodes other than {}", requests.size(), requests.get(0).getNodes());
for (ContainerRequest cr : requests) {
ContainerStartRequest csr = hostSpecificRequests.get(cr);
ContainerRequest newCr = new ContainerRequest(cr.getCapability(), null, null, cr.getPriority());
MutablePair<Integer, ContainerRequest> pair = new MutablePair<>(loopCounter, newCr);
requestedResources.put(csr, pair);
containerRequests.add(newCr);
hostSpecificRequests.remove(cr);
}
hostSpecificRequestsMap.remove(set.getKey());
} else {
if (blacklistedNodesForHostSpecificRequests != null) {
// Remove the blacklisted nodes during host specific requests
LOG.debug("All requests done.. Removing nodes from blacklist {}", blacklistedNodesForHostSpecificRequests);
amRmClient.updateBlacklist(null, blacklistedNodesForHostSpecificRequests);
blacklistedNodesForHostSpecificRequests = null;
}
// Proceed with other requests after host specific requests are done
if (!otherContainerRequests.isEmpty()) {
for (Entry<ContainerRequest, ContainerStartRequest> entry : otherContainerRequests.entrySet()) {
ContainerRequest cr = entry.getKey();
ContainerStartRequest csr = entry.getValue();
MutablePair<Integer, ContainerRequest> pair = new MutablePair<>(loopCounter, cr);
requestedResources.put(csr, pair);
containerRequests.add(cr);
}
otherContainerRequests.clear();
}
}
}
use of org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest in project apex-core by apache.
the class BlacklistBasedResourceRequestHandler method recreateContainerRequest.
private void recreateContainerRequest(Map<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> requestedResources, int loopCounter, ResourceRequestHandler resourceRequestor, List<ContainerRequest> removedContainerRequests) {
for (Map.Entry<StreamingContainerAgent.ContainerStartRequest, MutablePair<Integer, ContainerRequest>> entry : requestedResources.entrySet()) {
if ((loopCounter - entry.getValue().getKey()) > NUMBER_MISSED_HEARTBEATS) {
StreamingContainerAgent.ContainerStartRequest csr = entry.getKey();
removedContainerRequests.add(entry.getValue().getRight());
ContainerRequest cr = resourceRequestor.createContainerRequest(csr, false);
if (cr.getNodes() != null && !cr.getNodes().isEmpty()) {
addHostSpecificRequest(csr, cr);
} else {
otherContainerRequests.put(cr, csr);
}
}
}
}
Aggregations