Search in sources :

Example 16 with GetContainerStatusesRequest

use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.

the class TestContainerManagerRecovery method getContainerStatus.

private ContainerStatus getContainerStatus(Context context, final ContainerManagerImpl cm, ContainerId cid) throws Exception {
    UserGroupInformation user = UserGroupInformation.createRemoteUser(cid.getApplicationAttemptId().toString());
    NMTokenIdentifier nmToken = new NMTokenIdentifier(cid.getApplicationAttemptId(), context.getNodeId(), user.getShortUserName(), context.getNMTokenSecretManager().getCurrentKey().getKeyId());
    user.addTokenIdentifier(nmToken);
    List<ContainerId> containerIds = new ArrayList<>();
    containerIds.add(cid);
    final GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
    return user.doAs(new PrivilegedExceptionAction<ContainerStatus>() {

        @Override
        public ContainerStatus run() throws Exception {
            return cm.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
        }
    });
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UnsupportedFileSystemException(org.apache.hadoop.fs.UnsupportedFileSystemException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 17 with GetContainerStatusesRequest

use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.

the class TestContainerLaunch method internalKillTest.

private void internalKillTest(boolean delayed) throws Exception {
    conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, delayed ? 1000 : 0);
    containerManager.start();
    // ////// Construct the Container-id
    ApplicationId appId = ApplicationId.newInstance(1, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    ContainerId cId = ContainerId.newContainerId(appAttemptId, 0);
    File processStartFile = new File(tmpDir, "pid.txt").getAbsoluteFile();
    // setup a script that can handle sigterm gracefully
    File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
    PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
    if (Shell.WINDOWS) {
        writer.println("@echo \"Running testscript for delayed kill\"");
        writer.println("@echo \"Writing pid to start file\"");
        writer.println("@echo " + cId + "> " + processStartFile);
        writer.println("@ping -n 100 127.0.0.1 >nul");
    } else {
        writer.println("#!/bin/bash\n\n");
        writer.println("echo \"Running testscript for delayed kill\"");
        writer.println("hello=\"Got SIGTERM\"");
        writer.println("umask 0");
        writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
        writer.println("echo \"Writing pid to start file\"");
        writer.println("echo $$ >> " + processStartFile);
        writer.println("while true; do\nsleep 1s;\ndone");
    }
    writer.close();
    FileUtil.setExecutable(scriptFile, true);
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    // upload the script file so that the container can run it
    URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
    LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
    rsrc_alpha.setResource(resource_alpha);
    rsrc_alpha.setSize(-1);
    rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
    rsrc_alpha.setType(LocalResourceType.FILE);
    rsrc_alpha.setTimestamp(scriptFile.lastModified());
    String destinationFile = "dest_file.sh";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, rsrc_alpha);
    containerLaunchContext.setLocalResources(localResources);
    // set up the rest of the container
    List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
    containerLaunchContext.setCommands(commands);
    Priority priority = Priority.newInstance(10);
    long createTime = 1234;
    Token containerToken = createContainerToken(cId, priority, createTime);
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    int timeoutSecs = 0;
    while (!processStartFile.exists() && timeoutSecs++ < 20) {
        Thread.sleep(1000);
        LOG.info("Waiting for process start-file to be created");
    }
    Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
    NMContainerStatus nmContainerStatus = containerManager.getContext().getContainers().get(cId).getNMContainerStatus();
    Assert.assertEquals(priority, nmContainerStatus.getPriority());
    // Now test the stop functionality.
    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cId);
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
    containerManager.stopContainers(stopRequest);
    BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
    // if delayed container stop sends a sigterm followed by a sigkill
    // otherwise sigkill is sent immediately 
    GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
    ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
    Assert.assertEquals(ContainerExitStatus.KILLED_BY_APPMASTER, containerStatus.getExitStatus());
    // verify that the job object with ID matching container ID no longer exists.
    if (Shell.WINDOWS || !delayed) {
        Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(cId.toString()));
    } else {
        BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
        boolean foundSigTermMessage = false;
        while (true) {
            String line = reader.readLine();
            if (line == null) {
                break;
            }
            if (line.contains("SIGTERM")) {
                foundSigTermMessage = true;
                break;
            }
        }
        Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
        reader.close();
    }
}
Also used : HashMap(java.util.HashMap) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) Token(org.apache.hadoop.yarn.api.records.Token) URL(org.apache.hadoop.yarn.api.records.URL) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) FileReader(java.io.FileReader) PrintWriter(java.io.PrintWriter) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) Priority(org.apache.hadoop.yarn.api.records.Priority) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JarFile(java.util.jar.JarFile) File(java.io.File)

Example 18 with GetContainerStatusesRequest

use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.

the class TestContainerLaunch method testKillProcessGroup.

@Test
public void testKillProcessGroup() throws Exception {
    Assume.assumeTrue(Shell.isSetsidAvailable);
    containerManager.start();
    // Construct the Container-id
    ApplicationId appId = ApplicationId.newInstance(2, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    ContainerId cId = ContainerId.newContainerId(appAttemptId, 0);
    File processStartFile = new File(tmpDir, "pid.txt").getAbsoluteFile();
    File childProcessStartFile = new File(tmpDir, "child_pid.txt").getAbsoluteFile();
    // setup a script that can handle sigterm gracefully
    File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
    PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
    writer.println("#!/bin/bash\n\n");
    writer.println("echo \"Running testscript for forked process\"");
    writer.println("umask 0");
    writer.println("echo $$ >> " + processStartFile);
    writer.println("while true;\ndo sleep 1s;\ndone > /dev/null 2>&1 &");
    writer.println("echo $! >> " + childProcessStartFile);
    writer.println("while true;\ndo sleep 1s;\ndone");
    writer.close();
    FileUtil.setExecutable(scriptFile, true);
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    // upload the script file so that the container can run it
    URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
    LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
    rsrc_alpha.setResource(resource_alpha);
    rsrc_alpha.setSize(-1);
    rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
    rsrc_alpha.setType(LocalResourceType.FILE);
    rsrc_alpha.setTimestamp(scriptFile.lastModified());
    String destinationFile = "dest_file.sh";
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(destinationFile, rsrc_alpha);
    containerLaunchContext.setLocalResources(localResources);
    // set up the rest of the container
    List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
    containerLaunchContext.setCommands(commands);
    Priority priority = Priority.newInstance(10);
    long createTime = 1234;
    Token containerToken = createContainerToken(cId, priority, createTime);
    StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    int timeoutSecs = 0;
    while (!processStartFile.exists() && timeoutSecs++ < 20) {
        Thread.sleep(1000);
        LOG.info("Waiting for process start-file to be created");
    }
    Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
    BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
    // Get the pid of the process
    String pid = reader.readLine().trim();
    // No more lines
    Assert.assertEquals(null, reader.readLine());
    reader.close();
    reader = new BufferedReader(new FileReader(childProcessStartFile));
    // Get the pid of the child process
    String child = reader.readLine().trim();
    // No more lines
    Assert.assertEquals(null, reader.readLine());
    reader.close();
    LOG.info("Manually killing pid " + pid + ", but not child pid " + child);
    Shell.execCommand(new String[] { "kill", "-9", pid });
    BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
    Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(pid));
    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cId);
    GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
    ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
    Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), containerStatus.getExitStatus());
}
Also used : HashMap(java.util.HashMap) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) Token(org.apache.hadoop.yarn.api.records.Token) URL(org.apache.hadoop.yarn.api.records.URL) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FileReader(java.io.FileReader) PrintWriter(java.io.PrintWriter) Path(org.apache.hadoop.fs.Path) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) Priority(org.apache.hadoop.yarn.api.records.Priority) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JarFile(java.util.jar.JarFile) File(java.io.File) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Example 19 with GetContainerStatusesRequest

use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.

the class BaseContainerManagerTest method waitForContainerState.

public static void waitForContainerState(ContainerManagementProtocol containerManager, ContainerId containerID, List<ContainerState> finalStates, int timeOutMax) throws InterruptedException, YarnException, IOException {
    List<ContainerId> list = new ArrayList<ContainerId>();
    list.add(containerID);
    GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(list);
    ContainerStatus containerStatus = null;
    HashSet<ContainerState> fStates = new HashSet<>(finalStates);
    int timeoutSecs = 0;
    do {
        Thread.sleep(2000);
        containerStatus = containerManager.getContainerStatuses(request).getContainerStatuses().get(0);
        LOG.info("Waiting for container to get into one of states " + fStates + ". Current state is " + containerStatus.getState());
        timeoutSecs += 2;
    } while (!fStates.contains(containerStatus.getState()) && timeoutSecs < timeOutMax);
    LOG.info("Container state is " + containerStatus.getState());
    Assert.assertTrue("ContainerState is not correct (timedout)", fStates.contains(containerStatus.getState()));
}
Also used : ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) ContainerState(org.apache.hadoop.yarn.api.records.ContainerState) HashSet(java.util.HashSet)

Example 20 with GetContainerStatusesRequest

use of org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest in project hadoop by apache.

the class TestContainerManager method testContainerManagerInitialization.

@Test
public void testContainerManagerInitialization() throws IOException {
    containerManager.start();
    InetAddress localAddr = InetAddress.getLocalHost();
    String fqdn = localAddr.getCanonicalHostName();
    if (!localAddr.getHostAddress().equals(fqdn)) {
        // only check if fqdn is not same as ip
        // api returns ip in case of resolution failure
        Assert.assertEquals(fqdn, context.getNodeId().getHost());
    }
    // Just do a query for a non-existing container.
    boolean throwsException = false;
    try {
        List<ContainerId> containerIds = new ArrayList<>();
        ContainerId id = createContainerId(0);
        containerIds.add(id);
        GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(containerIds);
        GetContainerStatusesResponse response = containerManager.getContainerStatuses(request);
        if (response.getFailedRequests().containsKey(id)) {
            throw response.getFailedRequests().get(id).deSerialize();
        }
    } catch (Throwable e) {
        throwsException = true;
    }
    Assert.assertTrue(throwsException);
}
Also used : GetContainerStatusesResponse(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) InetAddress(java.net.InetAddress) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)24 GetContainerStatusesRequest (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest)24 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)24 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)20 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)20 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)20 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)20 Test (org.junit.Test)16 HashMap (java.util.HashMap)11 BaseContainerManagerTest (org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest)11 Path (org.apache.hadoop.fs.Path)9 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)9 URL (org.apache.hadoop.yarn.api.records.URL)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)7 StopContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest)7 Token (org.apache.hadoop.yarn.api.records.Token)6 BufferedReader (java.io.BufferedReader)5 FileReader (java.io.FileReader)5 GetContainerStatusesResponse (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse)5