use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestOpportunisticContainerAllocation method testMixedAllocationAndRelease.
@Test(timeout = 60000)
public void testMixedAllocationAndRelease() throws YarnException, IOException {
// setup container request
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
int containersRequestedNode = amClient.getTable(0).get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int containersRequestedRack = amClient.getTable(0).get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int containersRequestedAny = amClient.getTable(0).get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
assertEquals(4, containersRequestedNode);
assertEquals(4, containersRequestedRack);
assertEquals(4, containersRequestedAny);
assertEquals(2, oppContainersRequestedAny);
assertEquals(4, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
containersRequestedNode = amClient.getTable(0).get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
containersRequestedRack = amClient.getTable(0).get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
containersRequestedAny = amClient.getTable(0).get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
assertEquals(2, containersRequestedNode);
assertEquals(2, containersRequestedRack);
assertEquals(2, containersRequestedAny);
assertEquals(1, oppContainersRequestedAny);
assertEquals(4, amClient.ask.size());
assertEquals(0, amClient.release.size());
// RM should allocate container within 2 calls to allocate()
int allocatedContainerCount = 0;
int allocatedOpportContainerCount = 0;
int iterationsLeft = 50;
Set<ContainerId> releases = new TreeSet<>();
amClient.getNMTokenCache().clearCache();
Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
HashMap<String, Token> receivedNMTokens = new HashMap<>();
while (allocatedContainerCount < containersRequestedAny + oppContainersRequestedAny && iterationsLeft-- > 0) {
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
allocatedContainerCount += allocResponse.getAllocatedContainers().size();
for (Container container : allocResponse.getAllocatedContainers()) {
if (container.getExecutionType() == ExecutionType.OPPORTUNISTIC) {
allocatedOpportContainerCount++;
}
ContainerId rejectContainerId = container.getId();
releases.add(rejectContainerId);
}
for (NMToken token : allocResponse.getNMTokens()) {
String nodeID = token.getNodeId().toString();
receivedNMTokens.put(nodeID, token.getToken());
}
if (allocatedContainerCount < containersRequestedAny) {
// sleep to let NM's heartbeat to RM and trigger allocations
sleep(100);
}
}
assertEquals(containersRequestedAny + oppContainersRequestedAny, allocatedContainerCount);
assertEquals(oppContainersRequestedAny, allocatedOpportContainerCount);
for (ContainerId rejectContainerId : releases) {
amClient.releaseAssignedContainer(rejectContainerId);
}
assertEquals(3, amClient.release.size());
assertEquals(0, amClient.ask.size());
// need to tell the AMRMClient that we don't need these resources anymore
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
assertEquals(4, amClient.ask.size());
iterationsLeft = 3;
// do a few iterations to ensure RM is not going to send new containers
while (iterationsLeft-- > 0) {
// inform RM of rejection
AllocateResponse allocResponse = amClient.allocate(0.1f);
// RM did not send new containers because AM does not need any
assertEquals(0, allocResponse.getAllocatedContainers().size());
if (allocResponse.getCompletedContainersStatuses().size() > 0) {
for (ContainerStatus cStatus : allocResponse.getCompletedContainersStatuses()) {
if (releases.contains(cStatus.getContainerId())) {
assertEquals(cStatus.getState(), ContainerState.COMPLETE);
assertEquals(-100, cStatus.getExitStatus());
releases.remove(cStatus.getContainerId());
}
}
}
if (iterationsLeft > 0) {
// sleep to make sure NM's heartbeat
sleep(100);
}
}
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestOpportunisticContainerAllocation method testPromotionFromAcquired.
@Test(timeout = 60000)
public void testPromotionFromAcquired() throws YarnException, IOException {
// setup container request
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
assertEquals(1, oppContainersRequestedAny);
assertEquals(1, amClient.ask.size());
assertEquals(0, amClient.release.size());
// RM should allocate container within 2 calls to allocate()
int allocatedContainerCount = 0;
Map<ContainerId, Container> allocatedOpportContainers = new HashMap<>();
int iterationsLeft = 50;
amClient.getNMTokenCache().clearCache();
Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
HashMap<String, Token> receivedNMTokens = new HashMap<>();
updateMetrics("Before Opp Allocation");
while (allocatedContainerCount < oppContainersRequestedAny && iterationsLeft-- > 0) {
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
allocatedContainerCount += allocResponse.getAllocatedContainers().size();
for (Container container : allocResponse.getAllocatedContainers()) {
if (container.getExecutionType() == ExecutionType.OPPORTUNISTIC) {
allocatedOpportContainers.put(container.getId(), container);
removeCR(container);
}
}
for (NMToken token : allocResponse.getNMTokens()) {
String nodeID = token.getNodeId().toString();
receivedNMTokens.put(nodeID, token.getToken());
}
if (allocatedContainerCount < oppContainersRequestedAny) {
// sleep to let NM's heartbeat to RM and trigger allocations
sleep(100);
}
}
assertEquals(oppContainersRequestedAny, allocatedContainerCount);
assertEquals(oppContainersRequestedAny, allocatedOpportContainers.size());
updateMetrics("After Opp Allocation / Before Promotion");
try {
Container c = allocatedOpportContainers.values().iterator().next();
amClient.requestContainerUpdate(c, UpdateContainerRequest.newInstance(c.getVersion(), c.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC));
Assert.fail("Should throw Exception..");
} catch (IllegalArgumentException e) {
System.out.println("## " + e.getMessage());
Assert.assertTrue(e.getMessage().contains("target should be GUARANTEED and original should be OPPORTUNISTIC"));
}
Container c = allocatedOpportContainers.values().iterator().next();
amClient.requestContainerUpdate(c, UpdateContainerRequest.newInstance(c.getVersion(), c.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED));
iterationsLeft = 120;
Map<ContainerId, UpdatedContainer> updatedContainers = new HashMap<>();
// do a few iterations to ensure RM is not going to send new containers
while (iterationsLeft-- > 0 && updatedContainers.isEmpty()) {
// inform RM of rejection
AllocateResponse allocResponse = amClient.allocate(0.1f);
// RM did not send new containers because AM does not need any
if (allocResponse.getUpdatedContainers() != null) {
for (UpdatedContainer updatedContainer : allocResponse.getUpdatedContainers()) {
System.out.println("Got update..");
updatedContainers.put(updatedContainer.getContainer().getId(), updatedContainer);
}
}
if (iterationsLeft > 0) {
// sleep to make sure NM's heartbeat
sleep(100);
}
}
updateMetrics("After Promotion");
assertEquals(1, updatedContainers.size());
for (ContainerId cId : allocatedOpportContainers.keySet()) {
Container orig = allocatedOpportContainers.get(cId);
UpdatedContainer updatedContainer = updatedContainers.get(cId);
assertNotNull(updatedContainer);
assertEquals(ExecutionType.GUARANTEED, updatedContainer.getContainer().getExecutionType());
assertEquals(orig.getResource(), updatedContainer.getContainer().getResource());
assertEquals(orig.getNodeId(), updatedContainer.getContainer().getNodeId());
assertEquals(orig.getVersion() + 1, updatedContainer.getContainer().getVersion());
}
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.ask.clear();
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class TestDistributedScheduling method testAMRMClient.
/**
* Validates if AMRMClient can be used with Distributed Scheduling turned on.
*
* @throws Exception
*/
@Test(timeout = 120000)
@SuppressWarnings("unchecked")
public void testAMRMClient() throws Exception {
AMRMClientImpl<AMRMClient.ContainerRequest> amClient = null;
try {
Priority priority = Priority.newInstance(1);
Priority priority2 = Priority.newInstance(2);
Resource capability = Resource.newInstance(1024, 1);
List<NodeReport> nodeReports = rmClient.getNodeReports(NodeState.RUNNING);
String node = nodeReports.get(0).getNodeId().getHost();
String rack = nodeReports.get(0).getRackName();
String[] nodes = new String[] { node };
String[] racks = new String[] { rack };
// start am rm client
amClient = new AMRMClientImpl(client);
amClient.init(yarnConf);
amClient.start();
amClient.registerApplicationMaster(NetUtils.getHostname(), 1024, "");
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
RemoteRequestsTable<ContainerRequest> remoteRequestsTable = amClient.getTable(0);
int containersRequestedNode = remoteRequestsTable.get(priority, node, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int containersRequestedRack = remoteRequestsTable.get(priority, rack, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int containersRequestedAny = remoteRequestsTable.get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
int oppContainersRequestedAny = remoteRequestsTable.get(priority2, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, capability).remoteRequest.getNumContainers();
assertEquals(2, containersRequestedNode);
assertEquals(2, containersRequestedRack);
assertEquals(2, containersRequestedAny);
assertEquals(1, oppContainersRequestedAny);
assertEquals(4, amClient.ask.size());
assertEquals(0, amClient.release.size());
// RM should allocate container within 2 calls to allocate()
int allocatedContainerCount = 0;
int iterationsLeft = 10;
Set<ContainerId> releases = new TreeSet<>();
amClient.getNMTokenCache().clearCache();
Assert.assertEquals(0, amClient.getNMTokenCache().numberOfTokensInCache());
HashMap<String, Token> receivedNMTokens = new HashMap<>();
while (allocatedContainerCount < (containersRequestedAny + oppContainersRequestedAny) && iterationsLeft-- > 0) {
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
allocatedContainerCount += allocResponse.getAllocatedContainers().size();
for (Container container : allocResponse.getAllocatedContainers()) {
ContainerId rejectContainerId = container.getId();
releases.add(rejectContainerId);
}
for (NMToken token : allocResponse.getNMTokens()) {
String nodeID = token.getNodeId().toString();
receivedNMTokens.put(nodeID, token.getToken());
}
if (allocatedContainerCount < containersRequestedAny) {
// sleep to let NM's heartbeat to RM and trigger allocations
sleep(100);
}
}
assertEquals(allocatedContainerCount, containersRequestedAny + oppContainersRequestedAny);
for (ContainerId rejectContainerId : releases) {
amClient.releaseAssignedContainer(rejectContainerId);
}
assertEquals(3, amClient.release.size());
assertEquals(0, amClient.ask.size());
// need to tell the AMRMClient that we dont need these resources anymore
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
assertEquals(4, amClient.ask.size());
// test RPC exception handling
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amClient.addContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
final AMRMClient amc = amClient;
ApplicationMasterProtocol realRM = amClient.rmClient;
try {
ApplicationMasterProtocol mockRM = mock(ApplicationMasterProtocol.class);
when(mockRM.allocate(any(AllocateRequest.class))).thenAnswer(new Answer<AllocateResponse>() {
public AllocateResponse answer(InvocationOnMock invocation) throws Exception {
amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks, priority));
amc.removeContainerRequest(new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true)));
throw new Exception();
}
});
amClient.rmClient = mockRM;
amClient.allocate(0.1f);
} catch (Exception ioe) {
} finally {
amClient.rmClient = realRM;
}
assertEquals(3, amClient.release.size());
assertEquals(6, amClient.ask.size());
iterationsLeft = 3;
// do a few iterations to ensure RM is not going send new containers
while (iterationsLeft-- > 0) {
// inform RM of rejection
AllocateResponse allocResponse = amClient.allocate(0.1f);
// RM did not send new containers because AM does not need any
assertEquals(0, allocResponse.getAllocatedContainers().size());
if (allocResponse.getCompletedContainersStatuses().size() > 0) {
for (ContainerStatus cStatus : allocResponse.getCompletedContainersStatuses()) {
if (releases.contains(cStatus.getContainerId())) {
assertEquals(cStatus.getState(), ContainerState.COMPLETE);
assertEquals(-100, cStatus.getExitStatus());
releases.remove(cStatus.getContainerId());
}
}
}
if (iterationsLeft > 0) {
// sleep to make sure NM's heartbeat
sleep(100);
}
}
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
} finally {
if (amClient != null && amClient.getServiceState() == Service.STATE.STARTED) {
amClient.stop();
}
}
}
use of org.apache.hadoop.yarn.api.records.NMToken in project hadoop by apache.
the class ApplicationMasterService method registerApplicationMaster.
@Override
public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws YarnException, IOException {
AMRMTokenIdentifier amrmTokenIdentifier = YarnServerSecurityUtils.authorizeRequest();
ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.getApplicationAttemptId();
ApplicationId appID = applicationAttemptId.getApplicationId();
AllocateResponseLock lock = responseMap.get(applicationAttemptId);
if (lock == null) {
RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService", "Error in registering application master", appID, applicationAttemptId);
throwApplicationDoesNotExistInCacheException(applicationAttemptId);
}
// Allow only one thread in AM to do registerApp at a time.
synchronized (lock) {
AllocateResponse lastResponse = lock.getAllocateResponse();
if (hasApplicationMasterRegistered(applicationAttemptId)) {
String message = "Application Master is already registered : " + appID;
LOG.warn(message);
RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), AuditConstants.REGISTER_AM, "", "ApplicationMasterService", message, appID, applicationAttemptId);
throw new InvalidApplicationMasterRequestException(message);
}
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
RMApp app = this.rmContext.getRMApps().get(appID);
// Setting the response id to 0 to identify if the
// application master is register for the respective attemptid
lastResponse.setResponseId(0);
lock.setAllocateResponse(lastResponse);
LOG.info("AM registration " + applicationAttemptId);
this.rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptRegistrationEvent(applicationAttemptId, request.getHost(), request.getRpcPort(), request.getTrackingUrl()));
RMAuditLogger.logSuccess(app.getUser(), AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, applicationAttemptId);
// Pick up min/max resource from scheduler...
RegisterApplicationMasterResponse response = recordFactory.newRecordInstance(RegisterApplicationMasterResponse.class);
response.setMaximumResourceCapability(rScheduler.getMaximumResourceCapability(app.getQueue()));
response.setApplicationACLs(app.getRMAppAttempt(applicationAttemptId).getSubmissionContext().getAMContainerSpec().getApplicationACLs());
response.setQueue(app.getQueue());
if (UserGroupInformation.isSecurityEnabled()) {
LOG.info("Setting client token master key");
response.setClientToAMTokenMasterKey(java.nio.ByteBuffer.wrap(rmContext.getClientToAMTokenSecretManager().getMasterKey(applicationAttemptId).getEncoded()));
}
// and corresponding NM tokens.
if (app.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
List<Container> transferredContainers = rScheduler.getTransferredContainers(applicationAttemptId);
if (!transferredContainers.isEmpty()) {
response.setContainersFromPreviousAttempts(transferredContainers);
List<NMToken> nmTokens = new ArrayList<NMToken>();
for (Container container : transferredContainers) {
try {
NMToken token = rmContext.getNMTokenSecretManager().createAndGetNMToken(app.getUser(), applicationAttemptId, container);
if (null != token) {
nmTokens.add(token);
}
} catch (IllegalArgumentException e) {
// will be automatically retried by RMProxy in RPC layer.
if (e.getCause() instanceof UnknownHostException) {
throw (UnknownHostException) e.getCause();
}
}
}
response.setNMTokensFromPreviousAttempts(nmTokens);
LOG.info("Application " + appID + " retrieved " + transferredContainers.size() + " containers from previous" + " attempts and " + nmTokens.size() + " NM tokens.");
}
}
response.setSchedulerResourceTypes(rScheduler.getSchedulingResourceTypes());
return response;
}
}
Aggregations