use of org.apache.hadoop.yarn.api.records.UpdateContainerRequest in project hadoop by apache.
the class AMRMClientImpl method createUpdateList.
private List<UpdateContainerRequest> createUpdateList() {
List<UpdateContainerRequest> updateList = new ArrayList<>();
for (Map.Entry<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> entry : change.entrySet()) {
Resource targetCapability = entry.getValue().getValue().getCapability();
ExecutionType targetExecType = entry.getValue().getValue().getExecutionType();
ContainerUpdateType updateType = entry.getValue().getValue().getContainerUpdateType();
int version = entry.getValue().getKey().getVersion();
updateList.add(UpdateContainerRequest.newInstance(version, entry.getKey(), updateType, targetCapability, targetExecType));
}
return updateList;
}
use of org.apache.hadoop.yarn.api.records.UpdateContainerRequest in project hadoop by apache.
the class AMRMClientImpl method allocate.
@Override
public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
Preconditions.checkArgument(progressIndicator >= 0, "Progress indicator should not be negative");
AllocateResponse allocateResponse = null;
List<ResourceRequest> askList = null;
List<ContainerId> releaseList = null;
AllocateRequest allocateRequest = null;
List<String> blacklistToAdd = new ArrayList<String>();
List<String> blacklistToRemove = new ArrayList<String>();
Map<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> oldChange = new HashMap<>();
try {
synchronized (this) {
askList = cloneAsks();
// Save the current change for recovery
oldChange.putAll(change);
List<UpdateContainerRequest> updateList = createUpdateList();
releaseList = new ArrayList<ContainerId>(release);
// optimistically clear this collection assuming no RPC failure
ask.clear();
release.clear();
change.clear();
blacklistToAdd.addAll(blacklistAdditions);
blacklistToRemove.addAll(blacklistRemovals);
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove);
allocateRequest = AllocateRequest.newBuilder().responseId(lastResponseId).progress(progressIndicator).askList(askList).resourceBlacklistRequest(blacklistRequest).releaseList(releaseList).updateRequests(updateList).build();
// clear blacklistAdditions and blacklistRemovals before
// unsynchronized part
blacklistAdditions.clear();
blacklistRemovals.clear();
}
try {
allocateResponse = rmClient.allocate(allocateRequest);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
synchronized (this) {
release.addAll(this.pendingRelease);
blacklistAdditions.addAll(this.blacklistedNodes);
for (RemoteRequestsTable remoteRequestsTable : remoteRequests.values()) {
@SuppressWarnings("unchecked") Iterator<ResourceRequestInfo<T>> reqIter = remoteRequestsTable.iterator();
while (reqIter.hasNext()) {
addResourceRequestToAsk(reqIter.next().remoteRequest);
}
}
change.putAll(this.pendingChange);
}
// re register with RM
registerApplicationMaster();
allocateResponse = allocate(progressIndicator);
return allocateResponse;
}
synchronized (this) {
// update these on successful RPC
clusterNodeCount = allocateResponse.getNumClusterNodes();
lastResponseId = allocateResponse.getResponseId();
clusterAvailableResources = allocateResponse.getAvailableResources();
if (!allocateResponse.getNMTokens().isEmpty()) {
populateNMTokens(allocateResponse.getNMTokens());
}
if (allocateResponse.getAMRMToken() != null) {
updateAMRMToken(allocateResponse.getAMRMToken());
}
if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
}
if (!pendingChange.isEmpty()) {
List<ContainerStatus> completed = allocateResponse.getCompletedContainersStatuses();
List<UpdatedContainer> changed = new ArrayList<>();
changed.addAll(allocateResponse.getUpdatedContainers());
// containers
for (ContainerStatus status : completed) {
ContainerId containerId = status.getContainerId();
pendingChange.remove(containerId);
}
// remove all pending change requests that have been satisfied
if (!changed.isEmpty()) {
removePendingChangeRequests(changed);
}
}
}
} finally {
// TODO how to differentiate remote yarn exception vs error in rpc
if (allocateResponse == null) {
// preserve ask and release for next call to allocate()
synchronized (this) {
release.addAll(releaseList);
// synchronized block at the beginning of this method.
for (ResourceRequest oldAsk : askList) {
if (!ask.contains(oldAsk)) {
ask.add(oldAsk);
}
}
// that do not exist in the current change map:
for (Map.Entry<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> entry : oldChange.entrySet()) {
ContainerId oldContainerId = entry.getKey();
Container oldContainer = entry.getValue().getKey();
UpdateContainerRequest oldupdate = entry.getValue().getValue();
if (change.get(oldContainerId) == null) {
change.put(oldContainerId, new SimpleEntry<>(oldContainer, oldupdate));
}
}
blacklistAdditions.addAll(blacklistToAdd);
blacklistRemovals.addAll(blacklistToRemove);
}
}
}
return allocateResponse;
}
use of org.apache.hadoop.yarn.api.records.UpdateContainerRequest in project hadoop by apache.
the class AbstractYarnScheduler method createSchedContainerChangeRequests.
protected List<SchedContainerChangeRequest> createSchedContainerChangeRequests(List<UpdateContainerRequest> changeRequests, boolean increase) {
List<SchedContainerChangeRequest> schedulerChangeRequests = new ArrayList<SchedContainerChangeRequest>();
for (UpdateContainerRequest r : changeRequests) {
SchedContainerChangeRequest sr = null;
try {
sr = createSchedContainerChangeRequest(r, increase);
} catch (YarnException e) {
LOG.warn("Error happens when checking increase request, Ignoring.." + " exception=", e);
continue;
}
schedulerChangeRequests.add(sr);
}
return schedulerChangeRequests;
}
use of org.apache.hadoop.yarn.api.records.UpdateContainerRequest in project hadoop by apache.
the class AbstractYarnScheduler method handleDecreaseRequests.
private void handleDecreaseRequests(SchedulerApplicationAttempt appAttempt, List<UpdateContainerRequest> demotionRequests) {
OpportunisticContainerContext oppCntxt = appAttempt.getOpportunisticContainerContext();
for (UpdateContainerRequest uReq : demotionRequests) {
RMContainer rmContainer = rmContext.getScheduler().getRMContainer(uReq.getContainerId());
if (rmContainer != null) {
SchedulerNode schedulerNode = rmContext.getScheduler().getSchedulerNode(rmContainer.getContainer().getNodeId());
if (appAttempt.getUpdateContext().checkAndAddToOutstandingDecreases(uReq, schedulerNode, rmContainer.getContainer())) {
if (ContainerUpdateType.DEMOTE_EXECUTION_TYPE == uReq.getContainerUpdateType()) {
RMContainer demotedRMContainer = createDemotedRMContainer(appAttempt, oppCntxt, rmContainer);
appAttempt.addToNewlyDemotedContainers(uReq.getContainerId(), demotedRMContainer);
} else {
RMContainer demotedRMContainer = createDecreasedRMContainer(appAttempt, uReq, rmContainer);
appAttempt.addToNewlyDecreasedContainers(uReq.getContainerId(), demotedRMContainer);
}
} else {
appAttempt.addToUpdateContainerErrors(UpdateContainerError.newInstance(RMServerUtils.UPDATE_OUTSTANDING_ERROR, uReq));
}
} else {
LOG.warn("Cannot demote/decrease non-existent (or completed) " + "Container [" + uReq.getContainerId() + "]");
}
}
}
use of org.apache.hadoop.yarn.api.records.UpdateContainerRequest in project hadoop by apache.
the class TestIncreaseAllocationExpirer method testDecreaseAfterIncreaseWithAllocationExpiration.
@Test
public void testDecreaseAfterIncreaseWithAllocationExpiration() throws Exception {
/**
* 1. Allocate three containers: containerId2, containerId3, containerId4
* 2. Increase resource of containerId2: 3G -> 6G
* 3. Increase resource of containerId3: 3G -> 6G
* 4. Increase resource of containerId4: 3G -> 6G
* 5. Do NOT use the increase tokens for containerId2 and containerId3
* 6. Decrease containerId2: 6G -> 2G (i.e., below last confirmed resource)
* 7. Decrease containerId3: 6G -> 4G (i.e., above last confirmed resource)
* 8. Decrease containerId4: 6G -> 4G (i.e., above last confirmed resource)
* 9. Use token for containerId4 to increase containerId4 on NM to 6G
* 10. Verify containerId2 eventually uses 2G (removed from expirer)
* 11. verify containerId3 eventually uses 3G (increase token expires)
* 12. Verify containerId4 eventually uses 4G (removed from expirer)
* 13. Verify NM evetually uses 3G for containerId3, 4G for containerId4
*/
// Set the allocation expiration to 5 seconds
conf.setLong(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, 5000);
MockRM rm1 = new MockRM(conf);
rm1.start();
// Submit an application
MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 20 * GB);
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.RUNNING);
// AM request two new continers
am1.allocate("127.0.0.1", 3 * GB, 3, new ArrayList<ContainerId>());
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
rm1.waitForState(nm1, containerId3, RMContainerState.ALLOCATED);
ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
rm1.waitForState(nm1, containerId4, RMContainerState.ALLOCATED);
// AM acquires tokens to start container allocation expirer
List<Container> containers = am1.allocate(null, null).getAllocatedContainers();
Assert.assertEquals(3, containers.size());
Assert.assertNotNull(containers.get(0).getContainerToken());
Assert.assertNotNull(containers.get(1).getContainerToken());
Assert.assertNotNull(containers.get(2).getContainerToken());
// Report container status
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 2, ContainerState.RUNNING);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 3, ContainerState.RUNNING);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 4, ContainerState.RUNNING);
// Wait until container status becomes RUNNING
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
rm1.waitForState(nm1, containerId4, RMContainerState.RUNNING);
// am1 asks to change containerId2 and containerId3 from 1GB to 3GB
List<UpdateContainerRequest> increaseRequests = new ArrayList<>();
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId3, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId4, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
am1.sendContainerResizingRequest(increaseRequests);
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
// Start container increase allocation expirer
am1.allocate(null, null);
// Decrease containers
List<UpdateContainerRequest> decreaseRequests = new ArrayList<>();
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId2, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(2 * GB), null));
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId3, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId4, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
AllocateResponse response = am1.sendContainerResizingRequest(decreaseRequests);
// Verify containers are decreased in scheduler
Assert.assertEquals(3, response.getUpdatedContainers().size());
// Use the token for containerId4 on NM (6G). This should set the last
// confirmed resource to 4G, and cancel the allocation expirer
nm1.containerIncreaseStatus(getContainer(rm1, containerId4, Resources.createResource(6 * GB)));
// Wait for containerId3 token to expire,
Thread.sleep(10000);
am1.allocate(null, null);
Assert.assertEquals(2 * GB, rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource().getMemorySize());
Assert.assertEquals(3 * GB, rm1.getResourceScheduler().getRMContainer(containerId3).getAllocatedResource().getMemorySize());
Assert.assertEquals(4 * GB, rm1.getResourceScheduler().getRMContainer(containerId4).getAllocatedResource().getMemorySize());
// Verify NM receives 2 decrease message
List<Container> containersToDecrease = nm1.nodeHeartbeat(true).getContainersToDecrease();
Assert.assertEquals(2, containersToDecrease.size());
// Sort the list to make sure containerId3 is the first
Collections.sort(containersToDecrease);
Assert.assertEquals(3 * GB, containersToDecrease.get(0).getResource().getMemorySize());
Assert.assertEquals(4 * GB, containersToDecrease.get(1).getResource().getMemorySize());
rm1.stop();
}
Aggregations