Search in sources :

Example 16 with NMTokenIdentifier

use of org.apache.hadoop.yarn.security.NMTokenIdentifier in project hadoop by apache.

the class TestContainerManagerRecovery method getContainerStatus.

private ContainerStatus getContainerStatus(Context context, final ContainerManagerImpl cm, ContainerId cid) throws Exception {
    UserGroupInformation user = UserGroupInformation.createRemoteUser(cid.getApplicationAttemptId().toString());
    NMTokenIdentifier nmToken = new NMTokenIdentifier(cid.getApplicationAttemptId(), context.getNodeId(), user.getShortUserName(), context.getNMTokenSecretManager().getCurrentKey().getKeyId());
    user.addTokenIdentifier(nmToken);
    List<ContainerId> containerIds = new ArrayList<>();
    containerIds.add(cid);
    final GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
    return user.doAs(new PrivilegedExceptionAction<ContainerStatus>() {

        @Override
        public ContainerStatus run() throws Exception {
            return cm.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
        }
    });
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) ArrayList(java.util.ArrayList) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UnsupportedFileSystemException(org.apache.hadoop.fs.UnsupportedFileSystemException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 17 with NMTokenIdentifier

use of org.apache.hadoop.yarn.security.NMTokenIdentifier in project hadoop by apache.

the class TestNMProxy method getNMProxy.

private ContainerManagementProtocol getNMProxy(Configuration conf) {
    ApplicationId appId = ApplicationId.newInstance(1, 1);
    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
    org.apache.hadoop.yarn.api.records.Token nmToken = context.getNMTokenSecretManager().createNMToken(attemptId, context.getNodeId(), user);
    final InetSocketAddress address = conf.getSocketAddr(YarnConfiguration.NM_BIND_HOST, YarnConfiguration.NM_ADDRESS, YarnConfiguration.DEFAULT_NM_ADDRESS, YarnConfiguration.DEFAULT_NM_PORT);
    Token<NMTokenIdentifier> token = ConverterUtils.convertFromYarn(nmToken, SecurityUtil.buildTokenService(address));
    UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
    ugi.addToken(token);
    return NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi, YarnRPC.create(conf), address);
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) InetSocketAddress(java.net.InetSocketAddress) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 18 with NMTokenIdentifier

use of org.apache.hadoop.yarn.security.NMTokenIdentifier in project hadoop by apache.

the class TestContainerManager method createContainerManager.

@Override
protected ContainerManagerImpl createContainerManager(DeletionService delSrvc) {
    return new ContainerManagerImpl(context, exec, delSrvc, nodeStatusUpdater, metrics, dirsHandler) {

        @Override
        public void setBlockNewContainerRequests(boolean blockNewContainerRequests) {
        // do nothing
        }

        @Override
        protected UserGroupInformation getRemoteUgi() throws YarnException {
            ApplicationId appId = ApplicationId.newInstance(0, 0);
            ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
            UserGroupInformation ugi = UserGroupInformation.createRemoteUser(appAttemptId.toString());
            ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context.getNodeId(), user, context.getNMTokenSecretManager().getCurrentKey().getKeyId()));
            return ugi;
        }
    };
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 19 with NMTokenIdentifier

use of org.apache.hadoop.yarn.security.NMTokenIdentifier in project hadoop by apache.

the class TestContainerSchedulerQueuing method createContainerManager.

@Override
protected ContainerManagerImpl createContainerManager(DeletionService delSrvc) {
    return new ContainerManagerImpl(context, exec, delSrvc, nodeStatusUpdater, metrics, dirsHandler) {

        @Override
        public void setBlockNewContainerRequests(boolean blockNewContainerRequests) {
        // do nothing
        }

        @Override
        protected UserGroupInformation getRemoteUgi() throws YarnException {
            ApplicationId appId = ApplicationId.newInstance(0, 0);
            ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
            UserGroupInformation ugi = UserGroupInformation.createRemoteUser(appAttemptId.toString());
            ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context.getNodeId(), user, context.getNMTokenSecretManager().getCurrentKey().getKeyId()));
            return ugi;
        }

        @Override
        protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) {
            return new ContainersMonitorImpl(exec, dispatcher, this.context) {

                // Define resources available for containers to be executed.
                @Override
                public long getPmemAllocatedForContainers() {
                    return 2048 * 1024 * 1024L;
                }

                @Override
                public long getVmemAllocatedForContainers() {
                    float pmemRatio = getConfig().getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
                    return (long) (pmemRatio * getPmemAllocatedForContainers());
                }

                @Override
                public long getVCoresAllocatedForContainers() {
                    return 4;
                }
            };
        }
    };
}
Also used : ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) DefaultContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor) ContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor) ContainersMonitorImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 20 with NMTokenIdentifier

use of org.apache.hadoop.yarn.security.NMTokenIdentifier in project flink by apache.

the class YARNSessionCapacitySchedulerITCase method testTaskManagerFailure.

/**
	 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
	 */
// timeout after 100 seconds
@Test(timeout = 100000)
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // set the slots 3 to check if the vCores are set properly!
    "-s", // set the slots 3 to check if the vCores are set properly!
    "3", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);
    Assert.assertEquals(2, getRunningContainers());
    // ------------------------ Test if JobManager web interface is accessible -------
    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        // Only one running
        Assert.assertEquals(1, apps.size());
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);
        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());
        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);
        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));
        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);
        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));
        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }
    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------
    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }
    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);
    yc.stop();
    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);
    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }
    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());
        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);
    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");
    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);
    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));
    Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
    Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host"));
    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);
    LOG.info("Finished testTaskManagerFailure()");
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) Matcher(java.util.regex.Matcher) JsonNode(com.fasterxml.jackson.databind.JsonNode) UtilsTest.checkForLogString(org.apache.flink.yarn.UtilsTest.checkForLogString) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) Pattern(java.util.regex.Pattern) IOException(java.io.IOException) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) NodeManager(org.apache.hadoop.yarn.server.nodemanager.NodeManager) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Aggregations

NMTokenIdentifier (org.apache.hadoop.yarn.security.NMTokenIdentifier)20 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)16 ArrayList (java.util.ArrayList)10 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)10 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)9 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)6 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)5 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)5 ContainerTokenIdentifier (org.apache.hadoop.yarn.security.ContainerTokenIdentifier)5 InvalidToken (org.apache.hadoop.security.token.SecretManager.InvalidToken)4 NodeId (org.apache.hadoop.yarn.api.records.NodeId)4 SerializedException (org.apache.hadoop.yarn.api.records.SerializedException)4 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)4 UnsupportedFileSystemException (org.apache.hadoop.fs.UnsupportedFileSystemException)3 GetContainerStatusesRequest (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest)3 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)3 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)3 Test (org.junit.Test)3