use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class TestNodeStatusUpdater method testCleanedupApplicationContainerCleanup.
@Test
public void testCleanedupApplicationContainerCleanup() throws IOException {
NodeManager nm = new NodeManager();
YarnConfiguration conf = new YarnConfiguration();
conf.set(NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, "1000000");
nm.init(conf);
NodeStatusUpdaterImpl nodeStatusUpdater = (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
ApplicationId appId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
ContainerId cId = ContainerId.newContainerId(appAttemptId, 1);
Token containerToken = BuilderUtils.newContainerToken(cId, 0, "anyHost", 1234, "anyUser", BuilderUtils.newResource(1024, 1), 0, 123, "password".getBytes(), 0);
Container anyCompletedContainer = new ContainerImpl(conf, null, null, null, null, BuilderUtils.newContainerTokenIdentifier(containerToken), nm.getNMContext()) {
@Override
public ContainerState getCurrentState() {
return ContainerState.COMPLETE;
}
};
Application application = mock(Application.class);
when(application.getApplicationState()).thenReturn(ApplicationState.RUNNING);
nm.getNMContext().getApplications().putIfAbsent(appId, application);
nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
when(application.getApplicationState()).thenReturn(ApplicationState.FINISHING_CONTAINERS_WAIT);
// The completed container will be saved in case of lost heartbeat.
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
nm.getNMContext().getContainers().put(cId, anyCompletedContainer);
nm.getNMContext().getApplications().remove(appId);
// The completed container will be saved in case of lost heartbeat.
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class TestNodeStatusUpdater method testCompletedContainersIsRecentlyStopped.
@Test(timeout = 10000)
public void testCompletedContainersIsRecentlyStopped() throws Exception {
NodeManager nm = new NodeManager();
nm.init(conf);
NodeStatusUpdaterImpl nodeStatusUpdater = (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
ApplicationId appId = ApplicationId.newInstance(0, 0);
Application completedApp = mock(Application.class);
when(completedApp.getApplicationState()).thenReturn(ApplicationState.FINISHED);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
Token containerToken = BuilderUtils.newContainerToken(containerId, 0, "host", 1234, "user", BuilderUtils.newResource(1024, 1), 0, 123, "password".getBytes(), 0);
Container completedContainer = new ContainerImpl(conf, null, null, null, null, BuilderUtils.newContainerTokenIdentifier(containerToken), nm.getNMContext()) {
@Override
public ContainerState getCurrentState() {
return ContainerState.COMPLETE;
}
};
nm.getNMContext().getApplications().putIfAbsent(appId, completedApp);
nm.getNMContext().getContainers().put(containerId, completedContainer);
Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size());
Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(containerId));
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class TestNodeManagerReboot method testClearLocalDirWhenNodeReboot.
@Test(timeout = 2000000)
public void testClearLocalDirWhenNodeReboot() throws IOException, YarnException, InterruptedException {
nm = new MyNodeManager();
nm.start();
final ContainerManagementProtocol containerManager = nm.getContainerManager();
// create files under fileCache
createFiles(nmLocalDir.getAbsolutePath(), ContainerLocalizer.FILECACHE, 100);
localResourceDir.mkdirs();
ContainerLaunchContext containerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
// Construct the Container-id
ContainerId cId = createContainerId();
URL localResourceUri = URL.fromPath(localFS.makeQualified(new Path(localResourceDir.getAbsolutePath())));
LocalResource localResource = LocalResource.newInstance(localResourceUri, LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, -1, localResourceDir.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, localResource);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<String>();
containerLaunchContext.setCommands(commands);
NodeId nodeId = nm.getNMContext().getNodeId();
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, TestContainerManager.createContainerToken(cId, 0, nodeId, destinationFile, nm.getNMContext().getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
final StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
final UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(cId.getApplicationAttemptId().toString());
NMTokenIdentifier nmIdentifier = new NMTokenIdentifier(cId.getApplicationAttemptId(), nodeId, user, 123);
currentUser.addTokenIdentifier(nmIdentifier);
currentUser.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws YarnException, IOException {
nm.getContainerManager().startContainers(allRequests);
return null;
}
});
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(containerIds);
Container container = nm.getNMContext().getContainers().get(request.getContainerIds().get(0));
final int MAX_TRIES = 20;
int numTries = 0;
while (!container.getContainerState().equals(ContainerState.DONE) && numTries <= MAX_TRIES) {
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
// Do nothing
}
numTries++;
}
Assert.assertEquals(ContainerState.DONE, container.getContainerState());
Assert.assertTrue("The container should create a subDir named currentUser: " + user + "under localDir/usercache", numOfLocalDirs(nmLocalDir.getAbsolutePath(), ContainerLocalizer.USERCACHE) > 0);
Assert.assertTrue("There should be files or Dirs under nm_private when " + "container is launched", numOfLocalDirs(nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0);
// restart the NodeManager
restartNM(MAX_TRIES);
checkNumOfLocalDirs();
verify(delService, times(1)).delete((String) isNull(), argThat(new PathInclude(ResourceLocalizationService.NM_PRIVATE_DIR + "_DEL_")));
verify(delService, times(1)).delete((String) isNull(), argThat(new PathInclude(ContainerLocalizer.FILECACHE + "_DEL_")));
verify(delService, times(1)).scheduleFileDeletionTask(argThat(new FileDeletionInclude(user, null, new String[] { destinationFile })));
verify(delService, times(1)).scheduleFileDeletionTask(argThat(new FileDeletionInclude(null, ContainerLocalizer.USERCACHE + "_DEL_", new String[] {})));
// restart the NodeManager again
// this time usercache directory should be empty
restartNM(MAX_TRIES);
checkNumOfLocalDirs();
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class TestNodeManagerResync method testNMSentContainerStatusOnResync.
// This is to test when NM gets the resync response from last heart beat, it
// should be able to send the already-sent-via-last-heart-beat container
// statuses again when it re-register with RM.
@Test
public void testNMSentContainerStatusOnResync() throws Exception {
final ContainerStatus testCompleteContainer = TestNodeStatusUpdater.createContainerStatus(2, ContainerState.COMPLETE);
final Container container = TestNodeStatusUpdater.getMockContainer(testCompleteContainer);
NMContainerStatus report = createNMContainerStatus(2, ContainerState.COMPLETE);
when(container.getNMContainerStatus()).thenReturn(report);
NodeManager nm = new NodeManager() {
int registerCount = 0;
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
return new TestNodeStatusUpdaterResync(context, dispatcher, healthChecker, metrics) {
@Override
protected ResourceTracker createResourceTracker() {
return new MockResourceTracker() {
@Override
public RegisterNodeManagerResponse registerNodeManager(RegisterNodeManagerRequest request) throws YarnException, IOException {
if (registerCount == 0) {
// first register, no containers info.
try {
Assert.assertEquals(0, request.getNMContainerStatuses().size());
} catch (AssertionError error) {
error.printStackTrace();
assertionFailedInThread.set(true);
}
// put the completed container into the context
getNMContext().getContainers().put(testCompleteContainer.getContainerId(), container);
getNMContext().getApplications().put(testCompleteContainer.getContainerId().getApplicationAttemptId().getApplicationId(), mock(Application.class));
} else {
// second register contains the completed container info.
List<NMContainerStatus> statuses = request.getNMContainerStatuses();
try {
Assert.assertEquals(1, statuses.size());
Assert.assertEquals(testCompleteContainer.getContainerId(), statuses.get(0).getContainerId());
} catch (AssertionError error) {
error.printStackTrace();
assertionFailedInThread.set(true);
}
}
registerCount++;
return super.registerNodeManager(request);
}
@Override
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) {
// first heartBeat contains the completed container info
List<ContainerStatus> statuses = request.getNodeStatus().getContainersStatuses();
try {
Assert.assertEquals(1, statuses.size());
Assert.assertEquals(testCompleteContainer.getContainerId(), statuses.get(0).getContainerId());
} catch (AssertionError error) {
error.printStackTrace();
assertionFailedInThread.set(true);
}
// notify RESYNC on first heartbeat.
return YarnServerBuilderUtils.newNodeHeartbeatResponse(1, NodeAction.RESYNC, null, null, null, null, 1000L);
}
};
}
};
}
};
YarnConfiguration conf = createNMConfig();
nm.init(conf);
nm.start();
try {
syncBarrier.await();
} catch (BrokenBarrierException e) {
}
Assert.assertFalse(assertionFailedInThread.get());
nm.stop();
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container in project hadoop by apache.
the class ContainerExecutor method reacquireContainer.
/**
* Recover an already existing container. This is a blocking call and returns
* only when the container exits. Note that the container must have been
* activated prior to this call.
*
* @param ctx encapsulates information necessary to reacquire container
* @return The exit code of the pre-existing container
* @throws IOException if there is a failure while reacquiring the container
* @throws InterruptedException if interrupted while waiting to reacquire
* the container
*/
public int reacquireContainer(ContainerReacquisitionContext ctx) throws IOException, InterruptedException {
Container container = ctx.getContainer();
String user = ctx.getUser();
ContainerId containerId = ctx.getContainerId();
Path pidPath = getPidFilePath(containerId);
if (pidPath == null) {
LOG.warn(containerId + " is not active, returning terminated error");
return ExitCode.TERMINATED.getExitCode();
}
String pid = ProcessIdFileReader.getProcessId(pidPath);
if (pid == null) {
throw new IOException("Unable to determine pid for " + containerId);
}
LOG.info("Reacquiring " + containerId + " with pid " + pid);
ContainerLivenessContext livenessContext = new ContainerLivenessContext.Builder().setContainer(container).setUser(user).setPid(pid).build();
while (isContainerAlive(livenessContext)) {
Thread.sleep(1000);
}
// wait for exit code file to appear
final int sleepMsec = 100;
int msecLeft = 2000;
String exitCodeFile = ContainerLaunch.getExitCodeFile(pidPath.toString());
File file = new File(exitCodeFile);
while (!file.exists() && msecLeft >= 0) {
if (!isContainerActive(containerId)) {
LOG.info(containerId + " was deactivated");
return ExitCode.TERMINATED.getExitCode();
}
Thread.sleep(sleepMsec);
msecLeft -= sleepMsec;
}
if (msecLeft < 0) {
throw new IOException("Timeout while waiting for exit code from " + containerId);
}
try {
return Integer.parseInt(FileUtils.readFileToString(file).trim());
} catch (NumberFormatException e) {
throw new IOException("Error parsing exit code from pid " + pid, e);
}
}
Aggregations