use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class AMRMClientImpl method allocate.
@Override
public AllocateResponse allocate(float progressIndicator) throws YarnException, IOException {
Preconditions.checkArgument(progressIndicator >= 0, "Progress indicator should not be negative");
AllocateResponse allocateResponse = null;
List<ResourceRequest> askList = null;
List<ContainerId> releaseList = null;
AllocateRequest allocateRequest = null;
List<String> blacklistToAdd = new ArrayList<String>();
List<String> blacklistToRemove = new ArrayList<String>();
Map<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> oldChange = new HashMap<>();
try {
synchronized (this) {
askList = cloneAsks();
// Save the current change for recovery
oldChange.putAll(change);
List<UpdateContainerRequest> updateList = createUpdateList();
releaseList = new ArrayList<ContainerId>(release);
// optimistically clear this collection assuming no RPC failure
ask.clear();
release.clear();
change.clear();
blacklistToAdd.addAll(blacklistAdditions);
blacklistToRemove.addAll(blacklistRemovals);
ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.newInstance(blacklistToAdd, blacklistToRemove);
allocateRequest = AllocateRequest.newBuilder().responseId(lastResponseId).progress(progressIndicator).askList(askList).resourceBlacklistRequest(blacklistRequest).releaseList(releaseList).updateRequests(updateList).build();
// clear blacklistAdditions and blacklistRemovals before
// unsynchronized part
blacklistAdditions.clear();
blacklistRemovals.clear();
}
try {
allocateResponse = rmClient.allocate(allocateRequest);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing.");
synchronized (this) {
release.addAll(this.pendingRelease);
blacklistAdditions.addAll(this.blacklistedNodes);
for (RemoteRequestsTable remoteRequestsTable : remoteRequests.values()) {
@SuppressWarnings("unchecked") Iterator<ResourceRequestInfo<T>> reqIter = remoteRequestsTable.iterator();
while (reqIter.hasNext()) {
addResourceRequestToAsk(reqIter.next().remoteRequest);
}
}
change.putAll(this.pendingChange);
}
// re register with RM
registerApplicationMaster();
allocateResponse = allocate(progressIndicator);
return allocateResponse;
}
synchronized (this) {
// update these on successful RPC
clusterNodeCount = allocateResponse.getNumClusterNodes();
lastResponseId = allocateResponse.getResponseId();
clusterAvailableResources = allocateResponse.getAvailableResources();
if (!allocateResponse.getNMTokens().isEmpty()) {
populateNMTokens(allocateResponse.getNMTokens());
}
if (allocateResponse.getAMRMToken() != null) {
updateAMRMToken(allocateResponse.getAMRMToken());
}
if (!pendingRelease.isEmpty() && !allocateResponse.getCompletedContainersStatuses().isEmpty()) {
removePendingReleaseRequests(allocateResponse.getCompletedContainersStatuses());
}
if (!pendingChange.isEmpty()) {
List<ContainerStatus> completed = allocateResponse.getCompletedContainersStatuses();
List<UpdatedContainer> changed = new ArrayList<>();
changed.addAll(allocateResponse.getUpdatedContainers());
// containers
for (ContainerStatus status : completed) {
ContainerId containerId = status.getContainerId();
pendingChange.remove(containerId);
}
// remove all pending change requests that have been satisfied
if (!changed.isEmpty()) {
removePendingChangeRequests(changed);
}
}
}
} finally {
// TODO how to differentiate remote yarn exception vs error in rpc
if (allocateResponse == null) {
// preserve ask and release for next call to allocate()
synchronized (this) {
release.addAll(releaseList);
// synchronized block at the beginning of this method.
for (ResourceRequest oldAsk : askList) {
if (!ask.contains(oldAsk)) {
ask.add(oldAsk);
}
}
// that do not exist in the current change map:
for (Map.Entry<ContainerId, SimpleEntry<Container, UpdateContainerRequest>> entry : oldChange.entrySet()) {
ContainerId oldContainerId = entry.getKey();
Container oldContainer = entry.getValue().getKey();
UpdateContainerRequest oldupdate = entry.getValue().getValue();
if (change.get(oldContainerId) == null) {
change.put(oldContainerId, new SimpleEntry<>(oldContainer, oldupdate));
}
}
blacklistAdditions.addAll(blacklistToAdd);
blacklistRemovals.addAll(blacklistToRemove);
}
}
}
return allocateResponse;
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class NMClientImpl method getContainerStatus.
@Override
public ContainerStatus getContainerStatus(ContainerId containerId, NodeId nodeId) throws YarnException, IOException {
ContainerManagementProtocolProxyData proxy = null;
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(containerId);
try {
proxy = cmProxy.getProxy(nodeId.toString(), containerId);
GetContainerStatusesResponse response = proxy.getContainerManagementProtocol().getContainerStatuses(GetContainerStatusesRequest.newInstance(containerIds));
if (response.getFailedRequests() != null && response.getFailedRequests().containsKey(containerId)) {
Throwable t = response.getFailedRequests().get(containerId).deSerialize();
parseAndThrowException(t);
}
ContainerStatus containerStatus = response.getContainerStatuses().get(0);
return containerStatus;
} finally {
if (proxy != null) {
cmProxy.mayBeCloseProxy(proxy);
}
}
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class TestContainerManager method testChangeContainerResource.
@Test
public void testChangeContainerResource() throws Exception {
containerManager.start();
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
// Construct the Container-id
ContainerId cId = createContainerId(0);
if (Shell.WINDOWS) {
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
} else {
fileWriter.write("\numask 0");
fileWriter.write("\nexec sleep 100");
}
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
// Make sure the container reaches RUNNING state
BaseContainerManagerTest.waitForNMContainerState(containerManager, cId, org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.RUNNING);
// Construct container resource increase request,
List<Token> increaseTokens = new ArrayList<>();
// Add increase request.
Resource targetResource = Resource.newInstance(4096, 2);
Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, targetResource, context.getContainerTokenSecretManager(), null);
increaseTokens.add(containerToken);
IncreaseContainersResourceRequest increaseRequest = IncreaseContainersResourceRequest.newInstance(increaseTokens);
IncreaseContainersResourceResponse increaseResponse = containerManager.increaseContainersResource(increaseRequest);
Assert.assertEquals(1, increaseResponse.getSuccessfullyIncreasedContainers().size());
Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty());
// Check status
List<ContainerId> containerIds = new ArrayList<>();
containerIds.add(cId);
GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
// Check status immediately as resource increase is blocking
assertEquals(targetResource, containerStatus.getCapability());
// Simulate a decrease request
List<org.apache.hadoop.yarn.api.records.Container> containersToDecrease = new ArrayList<>();
targetResource = Resource.newInstance(2048, 2);
org.apache.hadoop.yarn.api.records.Container decreasedContainer = org.apache.hadoop.yarn.api.records.Container.newInstance(cId, null, null, targetResource, null, null);
containersToDecrease.add(decreasedContainer);
containerManager.handle(new CMgrDecreaseContainersResourceEvent(containersToDecrease));
// Check status with retry
containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
int retry = 0;
while (!targetResource.equals(containerStatus.getCapability()) && (retry++ < 5)) {
Thread.sleep(200);
containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
}
assertEquals(targetResource, containerStatus.getCapability());
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class TestContainerManager method testContainerLaunchAndStop.
//@Test
public void testContainerLaunchAndStop() throws IOException, InterruptedException, YarnException {
containerManager.start();
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile();
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
if (Shell.WINDOWS) {
fileWriter.println("@echo Hello World!> " + processStartFile);
fileWriter.println("@echo " + cId + ">> " + processStartFile);
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
} else {
// So that start file is readable by the test
fileWriter.write("\numask 0");
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 100");
}
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
// Now verify the contents of the file
BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
Assert.assertEquals("Hello World!", reader.readLine());
// Get the pid of the process
String pid = reader.readLine().trim();
// No more lines
Assert.assertEquals(null, reader.readLine());
// Now test the stop functionality.
// Assert that the process is alive
Assert.assertTrue("Process is not alive!", DefaultContainerExecutor.containerIsAlive(pid));
// Once more
Assert.assertTrue("Process is not alive!", DefaultContainerExecutor.containerIsAlive(pid));
List<ContainerId> containerIds = new ArrayList<>();
containerIds.add(cId);
StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
containerManager.stopContainers(stopRequest);
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
int expectedExitCode = ContainerExitStatus.KILLED_BY_APPMASTER;
Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus());
// Assert that the process is not alive anymore
Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(pid));
}
use of org.apache.hadoop.yarn.api.records.ContainerStatus in project hadoop by apache.
the class MockResourceManagerFacade method allocate.
@SuppressWarnings("deprecation")
@Override
public AllocateResponse allocate(AllocateRequest request) throws YarnException, IOException {
if (request.getAskList() != null && request.getAskList().size() > 0 && request.getReleaseList() != null && request.getReleaseList().size() > 0) {
Assert.fail("The mock RM implementation does not support receiving " + "askList and releaseList in the same heartbeat");
}
String amrmToken = getAppIdentifier();
ArrayList<Container> containerList = new ArrayList<Container>();
if (request.getAskList() != null) {
for (ResourceRequest rr : request.getAskList()) {
for (int i = 0; i < rr.getNumContainers(); i++) {
ContainerId containerId = ContainerId.newInstance(getApplicationAttemptId(1), containerIndex.incrementAndGet());
Container container = Records.newRecord(Container.class);
container.setId(containerId);
container.setPriority(rr.getPriority());
// We don't use the node for running containers in the test cases. So
// it is OK to hard code it to some dummy value
NodeId nodeId = NodeId.newInstance(!Strings.isNullOrEmpty(rr.getResourceName()) ? rr.getResourceName() : "dummy", 1000);
container.setNodeId(nodeId);
container.setResource(rr.getCapability());
containerList.add(container);
synchronized (applicationContainerIdMap) {
// Keep track of the containers returned to this application. We
// will need it in future
Assert.assertTrue("The application id is Not registered before allocate(): " + amrmToken, applicationContainerIdMap.containsKey(amrmToken));
List<ContainerId> ids = applicationContainerIdMap.get(amrmToken);
ids.add(containerId);
this.allocatedContainerMap.put(containerId, container);
}
}
}
}
if (request.getReleaseList() != null && request.getReleaseList().size() > 0) {
Log.getLog().info("Releasing containers: " + request.getReleaseList().size());
synchronized (applicationContainerIdMap) {
Assert.assertTrue("The application id is not registered before allocate(): " + amrmToken, applicationContainerIdMap.containsKey(amrmToken));
List<ContainerId> ids = applicationContainerIdMap.get(amrmToken);
for (ContainerId id : request.getReleaseList()) {
boolean found = false;
for (ContainerId c : ids) {
if (c.equals(id)) {
found = true;
break;
}
}
Assert.assertTrue("ContainerId " + id + " being released is not valid for application: " + conf.get("AMRMTOKEN"), found);
ids.remove(id);
// Return the released container back to the AM with new fake Ids. The
// test case does not care about the IDs. The IDs are faked because
// otherwise the LRM will throw duplication identifier exception. This
// returning of fake containers is ONLY done for testing purpose - for
// the test code to get confirmation that the sub-cluster resource
// managers received the release request
ContainerId fakeContainerId = ContainerId.newInstance(getApplicationAttemptId(1), containerIndex.incrementAndGet());
Container fakeContainer = allocatedContainerMap.get(id);
fakeContainer.setId(fakeContainerId);
containerList.add(fakeContainer);
}
}
}
Log.getLog().info("Allocating containers: " + containerList.size() + " for application attempt: " + conf.get("AMRMTOKEN"));
// Always issue a new AMRMToken as if RM rolled master key
Token newAMRMToken = Token.newInstance(new byte[0], "", new byte[0], "");
return AllocateResponse.newInstance(0, new ArrayList<ContainerStatus>(), containerList, new ArrayList<NodeReport>(), null, AMCommand.AM_RESYNC, 1, null, new ArrayList<NMToken>(), newAMRMToken, new ArrayList<UpdatedContainer>());
}
Aggregations