use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class RMContainerRequestor method makeRemoteRequest.
protected AllocateResponse makeRemoteRequest() throws YarnException, IOException {
applyRequestLimits();
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(new ArrayList<String>(blacklistAdditions), new ArrayList<String>(blacklistRemovals));
AllocateRequest allocateRequest = AllocateRequest.newInstance(lastResponseID, super.getApplicationProgress(), new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(release), blacklistRequest);
AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
lastResponseID = allocateResponse.getResponseId();
availableResources = allocateResponse.getAvailableResources();
lastClusterNmCount = clusterNmCount;
clusterNmCount = allocateResponse.getNumClusterNodes();
int numCompletedContainers = allocateResponse.getCompletedContainersStatuses().size();
if (ask.size() > 0 || release.size() > 0) {
LOG.info("getResources() for " + applicationId + ":" + " ask=" + ask.size() + " release= " + release.size() + " newContainers=" + allocateResponse.getAllocatedContainers().size() + " finishedContainers=" + numCompletedContainers + " resourcelimit=" + availableResources + " knownNMs=" + clusterNmCount);
}
ask.clear();
release.clear();
if (numCompletedContainers > 0) {
// re-send limited requests when a container completes to trigger asking
// for more containers
requestLimitsToUpdate.addAll(requestLimits.keySet());
}
if (blacklistAdditions.size() > 0 || blacklistRemovals.size() > 0) {
LOG.info("Update the blacklist for " + applicationId + ": blacklistAdditions=" + blacklistAdditions.size() + " blacklistRemovals=" + blacklistRemovals.size());
}
blacklistAdditions.clear();
blacklistRemovals.clear();
return allocateResponse;
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMClient method allocateAndStartContainers.
private List<Container> allocateAndStartContainers(final AMRMClient<ContainerRequest> amClient, final NMClient nmClient, int num) throws YarnException, IOException {
// set up allocation requests
for (int i = 0; i < num; ++i) {
amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
}
// send allocation requests
amClient.allocate(0.1f);
// let NM heartbeat to RM and trigger allocations
triggerSchedulingWithNMHeartBeat();
// get allocations
AllocateResponse allocResponse = amClient.allocate(0.1f);
List<Container> containers = allocResponse.getAllocatedContainers();
Assert.assertEquals(num, containers.size());
// build container launch context
Credentials ts = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
ts.writeTokenStorageToStream(dob);
ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
// start a process long enough for increase/decrease action to take effect
ContainerLaunchContext clc = BuilderUtils.newContainerLaunchContext(Collections.<String, LocalResource>emptyMap(), new HashMap<String, String>(), Arrays.asList("sleep", "100"), new HashMap<String, ByteBuffer>(), securityTokens, new HashMap<ApplicationAccessType, String>());
// start the containers and make sure they are in RUNNING state
try {
for (int i = 0; i < num; i++) {
Container container = containers.get(i);
nmClient.startContainer(container, clc);
// container status
while (true) {
ContainerStatus status = nmClient.getContainerStatus(container.getId(), container.getNodeId());
if (status.getState() == ContainerState.RUNNING) {
break;
}
sleep(10);
}
}
} catch (YarnException e) {
throw new AssertionError("Exception is not expected: " + e);
}
// let NM's heartbeat to RM to confirm container launch
triggerSchedulingWithNMHeartBeat();
return containers;
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMClient method testAMRMClientOnAMRMTokenRollOver.
@Test(timeout = 60000)
public void testAMRMClientOnAMRMTokenRollOver() throws YarnException, IOException {
AMRMClient<ContainerRequest> amClient = null;
try {
AMRMTokenSecretManager amrmTokenSecretManager = yarnCluster.getResourceManager().getRMContext().getAMRMTokenSecretManager();
// start am rm client
amClient = AMRMClient.<ContainerRequest>createAMRMClient();
amClient.init(conf);
amClient.start();
Long startTime = System.currentTimeMillis();
amClient.registerApplicationMaster("Host", 10000, "");
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_1 = getAMRMToken();
Assert.assertNotNull(amrmToken_1);
Assert.assertEquals(amrmToken_1.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
// At mean time, the old AMRMToken should continue to work
while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
amClient.allocate(0.1f);
sleep(1000);
}
amClient.allocate(0.1f);
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> amrmToken_2 = getAMRMToken();
Assert.assertNotNull(amrmToken_2);
Assert.assertEquals(amrmToken_2.decodeIdentifier().getKeyId(), amrmTokenSecretManager.getMasterKey().getMasterKey().getKeyId());
Assert.assertNotEquals(amrmToken_1, amrmToken_2);
// can do the allocate call with latest AMRMToken
AllocateResponse response = amClient.allocate(0.1f);
// Verify latest AMRMToken can be used to send allocation request.
UserGroupInformation testUser1 = UserGroupInformation.createRemoteUser("testUser1");
AMRMTokenIdentifierForTest newVersionTokenIdentifier = new AMRMTokenIdentifierForTest(amrmToken_2.decodeIdentifier(), "message");
Assert.assertEquals("Message is changed after set to newVersionTokenIdentifier", "message", newVersionTokenIdentifier.getMessage());
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newVersionToken = new org.apache.hadoop.security.token.Token<AMRMTokenIdentifier>(newVersionTokenIdentifier.getBytes(), amrmTokenSecretManager.retrievePassword(newVersionTokenIdentifier), newVersionTokenIdentifier.getKind(), new Text());
SecurityUtil.setTokenService(newVersionToken, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
testUser1.addToken(newVersionToken);
AllocateRequest request = Records.newRecord(AllocateRequest.class);
request.setResponseId(response.getResponseId());
testUser1.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(request);
// and can not use this rolled-over token to make a allocate all.
while (true) {
if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getCurrnetMasterKeyData().getMasterKey().getKeyId()) {
if (amrmTokenSecretManager.getNextMasterKeyData() == null) {
break;
} else if (amrmToken_2.decodeIdentifier().getKeyId() != amrmTokenSecretManager.getNextMasterKeyData().getMasterKey().getKeyId()) {
break;
}
}
amClient.allocate(0.1f);
sleep(1000);
}
try {
UserGroupInformation testUser2 = UserGroupInformation.createRemoteUser("testUser2");
SecurityUtil.setTokenService(amrmToken_2, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress());
testUser2.addToken(amrmToken_2);
testUser2.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, yarnCluster.getResourceManager().getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(Records.newRecord(AllocateRequest.class));
Assert.fail("The old Token should not work");
} catch (Exception ex) {
Assert.assertTrue(ex instanceof InvalidToken);
Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + amrmToken_2.decodeIdentifier().getApplicationAttemptId()));
}
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
} finally {
if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
amClient.stop();
}
}
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestAMRMClient method testAMRMClientMatchStorage.
//(timeout=60000)
@Test
public void testAMRMClientMatchStorage() throws YarnException, IOException {
AMRMClientImpl<ContainerRequest> amClient = null;
try {
// start am rm client
amClient = (AMRMClientImpl<ContainerRequest>) AMRMClient.<ContainerRequest>createAMRMClient();
amClient.init(conf);
amClient.start();
amClient.registerApplicationMaster("Host", 10000, "");
Priority priority1 = Records.newRecord(Priority.class);
priority1.setPriority(2);
ContainerRequest storedContainer1 = new ContainerRequest(capability, nodes, racks, priority);
ContainerRequest storedContainer2 = new ContainerRequest(capability, nodes, racks, priority);
ContainerRequest storedContainer3 = new ContainerRequest(capability, null, null, priority1);
amClient.addContainerRequest(storedContainer1);
amClient.addContainerRequest(storedContainer2);
amClient.addContainerRequest(storedContainer3);
// test addition and storage
RemoteRequestsTable<ContainerRequest> remoteRequestsTable = amClient.getTable(0);
int containersRequestedAny = remoteRequestsTable.get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
assertEquals(2, containersRequestedAny);
containersRequestedAny = remoteRequestsTable.get(priority1, ResourceRequest.ANY, ExecutionType.GUARANTEED, capability).remoteRequest.getNumContainers();
assertEquals(1, containersRequestedAny);
List<? extends Collection<ContainerRequest>> matches = amClient.getMatchingRequests(priority, node, capability);
verifyMatches(matches, 2);
matches = amClient.getMatchingRequests(priority, rack, capability);
verifyMatches(matches, 2);
matches = amClient.getMatchingRequests(priority, ResourceRequest.ANY, capability);
verifyMatches(matches, 2);
matches = amClient.getMatchingRequests(priority1, rack, capability);
assertTrue(matches.isEmpty());
matches = amClient.getMatchingRequests(priority1, ResourceRequest.ANY, capability);
verifyMatches(matches, 1);
// test removal
amClient.removeContainerRequest(storedContainer3);
matches = amClient.getMatchingRequests(priority, node, capability);
verifyMatches(matches, 2);
amClient.removeContainerRequest(storedContainer2);
matches = amClient.getMatchingRequests(priority, node, capability);
verifyMatches(matches, 1);
matches = amClient.getMatchingRequests(priority, rack, capability);
verifyMatches(matches, 1);
// test matching of containers
ContainerRequest storedRequest = matches.get(0).iterator().next();
assertEquals(storedContainer1, storedRequest);
amClient.removeContainerRequest(storedContainer1);
matches = amClient.getMatchingRequests(priority, ResourceRequest.ANY, capability);
assertTrue(matches.isEmpty());
matches = amClient.getMatchingRequests(priority1, ResourceRequest.ANY, capability);
assertTrue(matches.isEmpty());
// 0 requests left. everything got cleaned up
assertTrue(amClient.getTable(0).isEmpty());
// go through an exemplary allocation, matching and release cycle
amClient.addContainerRequest(storedContainer1);
amClient.addContainerRequest(storedContainer3);
// RM should allocate container within 2 calls to allocate()
int allocatedContainerCount = 0;
int iterationsLeft = 3;
while (allocatedContainerCount < 2 && iterationsLeft-- > 0) {
Log.getLog().info("Allocated " + allocatedContainerCount + " containers" + " with " + iterationsLeft + " iterations left");
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
assertEquals(nodeCount, amClient.getClusterNodeCount());
allocatedContainerCount += allocResponse.getAllocatedContainers().size();
for (Container container : allocResponse.getAllocatedContainers()) {
ContainerRequest expectedRequest = container.getPriority().equals(storedContainer1.getPriority()) ? storedContainer1 : storedContainer3;
matches = amClient.getMatchingRequests(container.getPriority(), ResourceRequest.ANY, container.getResource());
// test correct matched container is returned
verifyMatches(matches, 1);
ContainerRequest matchedRequest = matches.get(0).iterator().next();
assertEquals(matchedRequest, expectedRequest);
amClient.removeContainerRequest(matchedRequest);
// assign this container, use it and release it
amClient.releaseAssignedContainer(container.getId());
}
if (allocatedContainerCount < containersRequestedAny) {
// let NM heartbeat to RM and trigger allocations
triggerSchedulingWithNMHeartBeat();
}
}
assertEquals(2, allocatedContainerCount);
AllocateResponse allocResponse = amClient.allocate(0.1f);
assertEquals(0, amClient.release.size());
assertEquals(0, amClient.ask.size());
assertEquals(0, allocResponse.getAllocatedContainers().size());
// 0 requests left. everything got cleaned up
assertTrue(remoteRequestsTable.isEmpty());
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
} finally {
if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
amClient.stop();
}
}
}
Aggregations