use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.
the class TestContainerManagerRecovery method getContainerStatus.
private ContainerStatus getContainerStatus(Context context, final ContainerManagerImpl cm, ContainerId cid) throws Exception {
UserGroupInformation user = UserGroupInformation.createRemoteUser(cid.getApplicationAttemptId().toString());
NMTokenIdentifier nmToken = new NMTokenIdentifier(cid.getApplicationAttemptId(), context.getNodeId(), user.getShortUserName(), context.getNMTokenSecretManager().getCurrentKey().getKeyId());
user.addTokenIdentifier(nmToken);
List<ContainerId> containerIds = new ArrayList<>();
containerIds.add(cid);
final GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
return user.doAs(new PrivilegedExceptionAction<ContainerStatus>() {
@Override
public ContainerStatus run() throws Exception {
return cm.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
}
});
}
use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.
the class TestContainerLaunch method internalKillTest.
private void internalKillTest(boolean delayed) throws Exception {
conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, delayed ? 1000 : 0);
containerManager.start();
// ////// Construct the Container-id
ApplicationId appId = ApplicationId.newInstance(1, 1);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cId = ContainerId.newContainerId(appAttemptId, 0);
File processStartFile = new File(tmpDir, "pid.txt").getAbsoluteFile();
// setup a script that can handle sigterm gracefully
File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
if (Shell.WINDOWS) {
writer.println("@echo \"Running testscript for delayed kill\"");
writer.println("@echo \"Writing pid to start file\"");
writer.println("@echo " + cId + "> " + processStartFile);
writer.println("@ping -n 100 127.0.0.1 >nul");
} else {
writer.println("#!/bin/bash\n\n");
writer.println("echo \"Running testscript for delayed kill\"");
writer.println("hello=\"Got SIGTERM\"");
writer.println("umask 0");
writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
writer.println("echo \"Writing pid to start file\"");
writer.println("echo $$ >> " + processStartFile);
writer.println("while true; do\nsleep 1s;\ndone");
}
writer.close();
FileUtil.setExecutable(scriptFile, true);
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// upload the script file so that the container can run it
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file.sh";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
// set up the rest of the container
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
Priority priority = Priority.newInstance(10);
long createTime = 1234;
Token containerToken = createContainerToken(cId, priority, createTime);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
NMContainerStatus nmContainerStatus = containerManager.getContext().getContainers().get(cId).getNMContainerStatus();
Assert.assertEquals(priority, nmContainerStatus.getPriority());
// Now test the stop functionality.
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
containerManager.stopContainers(stopRequest);
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
// if delayed container stop sends a sigterm followed by a sigkill
// otherwise sigkill is sent immediately
GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
Assert.assertEquals(ContainerExitStatus.KILLED_BY_APPMASTER, containerStatus.getExitStatus());
// verify that the job object with ID matching container ID no longer exists.
if (Shell.WINDOWS || !delayed) {
Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(cId.toString()));
} else {
BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
boolean foundSigTermMessage = false;
while (true) {
String line = reader.readLine();
if (line == null) {
break;
}
if (line.contains("SIGTERM")) {
foundSigTermMessage = true;
break;
}
}
Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
reader.close();
}
}
use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.
the class TestContainerLaunch method testKillProcessGroup.
@Test
public void testKillProcessGroup() throws Exception {
Assume.assumeTrue(Shell.isSetsidAvailable);
containerManager.start();
// Construct the Container-id
ApplicationId appId = ApplicationId.newInstance(2, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cId = ContainerId.newContainerId(appAttemptId, 0);
File processStartFile = new File(tmpDir, "pid.txt").getAbsoluteFile();
File childProcessStartFile = new File(tmpDir, "child_pid.txt").getAbsoluteFile();
// setup a script that can handle sigterm gracefully
File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
writer.println("#!/bin/bash\n\n");
writer.println("echo \"Running testscript for forked process\"");
writer.println("umask 0");
writer.println("echo $$ >> " + processStartFile);
writer.println("while true;\ndo sleep 1s;\ndone > /dev/null 2>&1 &");
writer.println("echo $! >> " + childProcessStartFile);
writer.println("while true;\ndo sleep 1s;\ndone");
writer.close();
FileUtil.setExecutable(scriptFile, true);
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// upload the script file so that the container can run it
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file.sh";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
// set up the rest of the container
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
Priority priority = Priority.newInstance(10);
long createTime = 1234;
Token containerToken = createContainerToken(cId, priority, createTime);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
// Get the pid of the process
String pid = reader.readLine().trim();
// No more lines
Assert.assertEquals(null, reader.readLine());
reader.close();
reader = new BufferedReader(new FileReader(childProcessStartFile));
// Get the pid of the child process
String child = reader.readLine().trim();
// No more lines
Assert.assertEquals(null, reader.readLine());
reader.close();
LOG.info("Manually killing pid " + pid + ", but not child pid " + child);
Shell.execCommand(new String[] { "kill", "-9", pid });
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(pid));
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), containerStatus.getExitStatus());
}
use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.
the class BaseContainerManagerTest method waitForContainerState.
public static void waitForContainerState(ContainerManagementProtocol containerManager, ContainerId containerID, List<ContainerState> finalStates, int timeOutMax) throws InterruptedException, YarnException, IOException {
List<ContainerId> list = new ArrayList<ContainerId>();
list.add(containerID);
GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(list);
ContainerStatus containerStatus = null;
HashSet<ContainerState> fStates = new HashSet<>(finalStates);
int timeoutSecs = 0;
do {
Thread.sleep(2000);
containerStatus = containerManager.getContainerStatuses(request).getContainerStatuses().get(0);
LOG.info("Waiting for container to get into one of states " + fStates + ". Current state is " + containerStatus.getState());
timeoutSecs += 2;
} while (!fStates.contains(containerStatus.getState()) && timeoutSecs < timeOutMax);
LOG.info("Container state is " + containerStatus.getState());
Assert.assertTrue("ContainerState is not correct (timedout)", fStates.contains(containerStatus.getState()));
}
use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.
the class TestContainerManager method testContainerManagerInitialization.
@Test
public void testContainerManagerInitialization() throws IOException {
containerManager.start();
InetAddress localAddr = InetAddress.getLocalHost();
String fqdn = localAddr.getCanonicalHostName();
if (!localAddr.getHostAddress().equals(fqdn)) {
// only check if fqdn is not same as ip
// api returns ip in case of resolution failure
Assert.assertEquals(fqdn, context.getNodeId().getHost());
}
// Just do a query for a non-existing container.
boolean throwsException = false;
try {
List<ContainerId> containerIds = new ArrayList<>();
ContainerId id = createContainerId(0);
containerIds.add(id);
GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(containerIds);
GetContainerStatusesResponse response = containerManager.getContainerStatuses(request);
if (response.getFailedRequests().containsKey(id)) {
throw response.getFailedRequests().get(id).deSerialize();
}
} catch (Throwable e) {
throwsException = true;
}
Assert.assertTrue(throwsException);
}
Aggregations