Search in sources :

Example 11 with StopContainersRequest

use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project hadoop by apache.

the class TestContainerManager method testMultipleContainersStopAndGetStatus.

@Test
public void testMultipleContainersStopAndGetStatus() throws Exception {
    containerManager.start();
    List<StartContainerRequest> startRequest = new ArrayList<>();
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    List<ContainerId> containerIds = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        ContainerId cId;
        if ((i & 1) == 0) {
            // Containers with even id belong to an unauthorized app
            cId = createContainerId(i, 1);
        } else {
            cId = createContainerId(i, 0);
        }
        Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager());
        StartContainerRequest request = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
        startRequest.add(request);
        containerIds.add(cId);
    }
    // start containers
    StartContainersRequest requestList = StartContainersRequest.newInstance(startRequest);
    containerManager.startContainers(requestList);
    Thread.sleep(5000);
    // Get container statuses
    GetContainerStatusesRequest statusRequest = GetContainerStatusesRequest.newInstance(containerIds);
    GetContainerStatusesResponse statusResponse = containerManager.getContainerStatuses(statusRequest);
    Assert.assertEquals(5, statusResponse.getContainerStatuses().size());
    for (ContainerStatus status : statusResponse.getContainerStatuses()) {
        // Containers with odd id should succeed
        Assert.assertEquals(1, status.getContainerId().getContainerId() & 1);
    }
    Assert.assertEquals(5, statusResponse.getFailedRequests().size());
    for (Map.Entry<ContainerId, SerializedException> entry : statusResponse.getFailedRequests().entrySet()) {
        // Containers with even id should fail.
        Assert.assertEquals(0, entry.getKey().getContainerId() & 1);
        Assert.assertTrue(entry.getValue().getMessage().contains("attempted to get status for non-application container"));
    }
    // stop containers
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
    StopContainersResponse stopResponse = containerManager.stopContainers(stopRequest);
    Assert.assertEquals(5, stopResponse.getSuccessfullyStoppedContainers().size());
    for (ContainerId id : stopResponse.getSuccessfullyStoppedContainers()) {
        // Containers with odd id should succeed.
        Assert.assertEquals(1, id.getContainerId() & 1);
    }
    Assert.assertEquals(5, stopResponse.getFailedRequests().size());
    for (Map.Entry<ContainerId, SerializedException> entry : stopResponse.getFailedRequests().entrySet()) {
        // Containers with even id should fail.
        Assert.assertEquals(0, entry.getKey().getContainerId() & 1);
        Assert.assertTrue(entry.getValue().getMessage().contains("attempted to stop non-application container"));
    }
}
Also used : StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) SerializedException(org.apache.hadoop.yarn.api.records.SerializedException) ArrayList(java.util.ArrayList) Token(org.apache.hadoop.yarn.api.records.Token) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) GetContainerStatusesResponse(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Map(java.util.Map) HashMap(java.util.HashMap) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) StopContainersResponse(org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse) Test(org.junit.Test)

Example 12 with StopContainersRequest

use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project hadoop by apache.

the class TestContainerSchedulerQueuing method testStopQueuedContainer.

/**
   * Start running one GUARANTEED container and queue two OPPORTUNISTIC ones.
   * Try killing one of the two queued containers.
   * @throws Exception
   */
@Test
public void testStopQueuedContainer() throws Exception {
    containerManager.start();
    ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    List<StartContainerRequest> list = new ArrayList<>();
    list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(2048, 1), context.getContainerTokenSecretManager(), null, ExecutionType.GUARANTEED)));
    list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(512, 1), context.getContainerTokenSecretManager(), null, ExecutionType.OPPORTUNISTIC)));
    list.add(StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER, context.getNodeId(), user, BuilderUtils.newResource(512, 1), context.getContainerTokenSecretManager(), null, ExecutionType.OPPORTUNISTIC)));
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    Thread.sleep(2000);
    // Assert there is initially one container running and two queued.
    int runningContainersNo = 0;
    int queuedContainersNo = 0;
    List<ContainerId> statList = new ArrayList<ContainerId>();
    for (int i = 0; i < 3; i++) {
        statList.add(createContainerId(i));
    }
    GetContainerStatusesRequest statRequest = GetContainerStatusesRequest.newInstance(statList);
    List<ContainerStatus> containerStatuses = containerManager.getContainerStatuses(statRequest).getContainerStatuses();
    for (ContainerStatus status : containerStatuses) {
        if (status.getState() == org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) {
            runningContainersNo++;
        } else if (status.getState() == org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) {
            queuedContainersNo++;
        }
        System.out.println("\nStatus : [" + status + "]\n");
    }
    Assert.assertEquals(1, runningContainersNo);
    Assert.assertEquals(2, queuedContainersNo);
    // Stop one of the two queued containers.
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(Arrays.asList(createContainerId(1)));
    containerManager.stopContainers(stopRequest);
    Thread.sleep(2000);
    // Assert queued container got properly stopped.
    statList.clear();
    for (int i = 0; i < 3; i++) {
        statList.add(createContainerId(i));
    }
    statRequest = GetContainerStatusesRequest.newInstance(statList);
    HashMap<org.apache.hadoop.yarn.api.records.ContainerState, ContainerStatus> map = new HashMap<>();
    for (int i = 0; i < 10; i++) {
        containerStatuses = containerManager.getContainerStatuses(statRequest).getContainerStatuses();
        for (ContainerStatus status : containerStatuses) {
            System.out.println("\nStatus : [" + status + "]\n");
            map.put(status.getState(), status);
            if (map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) && map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) && map.containsKey(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE)) {
                break;
            }
            Thread.sleep(1000);
        }
    }
    Assert.assertEquals(createContainerId(0), map.get(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING).getContainerId());
    Assert.assertEquals(createContainerId(1), map.get(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE).getContainerId());
    Assert.assertEquals(createContainerId(2), map.get(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED).getContainerId());
}
Also used : StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) GetContainerStatusesRequest(org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) ContainerState(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Example 13 with StopContainersRequest

use of org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest in project flink by apache.

the class YARNSessionCapacitySchedulerITCase method testTaskManagerFailure.

/**
	 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
	 */
// timeout after 100 seconds
@Test(timeout = 100000)
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // set the slots 3 to check if the vCores are set properly!
    "-s", // set the slots 3 to check if the vCores are set properly!
    "3", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);
    Assert.assertEquals(2, getRunningContainers());
    // ------------------------ Test if JobManager web interface is accessible -------
    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        // Only one running
        Assert.assertEquals(1, apps.size());
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);
        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());
        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);
        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));
        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);
        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));
        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }
    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------
    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }
    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);
    yc.stop();
    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);
    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }
    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());
        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);
    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");
    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);
    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));
    Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
    Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host"));
    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);
    LOG.info("Finished testTaskManagerFailure()");
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) Matcher(java.util.regex.Matcher) JsonNode(com.fasterxml.jackson.databind.JsonNode) UtilsTest.checkForLogString(org.apache.flink.yarn.UtilsTest.checkForLogString) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) Pattern(java.util.regex.Pattern) IOException(java.io.IOException) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) NodeManager(org.apache.hadoop.yarn.server.nodemanager.NodeManager) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Aggregations

StopContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest)13 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)12 ArrayList (java.util.ArrayList)10 StartContainerRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest)8 StartContainersRequest (org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)8 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)8 GetContainerStatusesRequest (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest)7 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)6 Test (org.junit.Test)6 HashMap (java.util.HashMap)5 Path (org.apache.hadoop.fs.Path)4 StopContainersResponse (org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse)4 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 BufferedReader (java.io.BufferedReader)3 File (java.io.File)3 FileReader (java.io.FileReader)3 IOException (java.io.IOException)3 PrintWriter (java.io.PrintWriter)3 GetContainerStatusesResponse (org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse)3