Search in sources :

Example 61 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class AbstractYarnClusterDescriptor method getYarnClient.

/**
	 * Gets a Hadoop Yarn client
	 * @return Returns a YarnClient which has to be shutdown manually
	 */
protected YarnClient getYarnClient() {
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();
    return yarnClient;
}
Also used : YarnClient(org.apache.hadoop.yarn.client.api.YarnClient)

Example 62 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class YARNSessionFIFOITCase method testDetachedMode.

/**
	 * Test regular operation, including command line parameter parsing.
	 */
// timeout after a minute.
@Test(timeout = 60000)
public void testDetachedMode() throws InterruptedException {
    LOG.info("Starting testDetachedMode()");
    addTestAppender(FlinkYarnSessionCli.class, Level.INFO);
    Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // test setting a custom name
    "--name", // test setting a custom name
    "MyCustomName", "--detached" }, "Flink JobManager is now running on", RunTypes.YARN_SESSION);
    // before checking any strings outputted by the CLI, first give it time to return
    runner.join();
    checkForLogString("The Flink YARN client has been started in detached mode");
    LOG.info("Waiting until two containers are running");
    // wait until two containers are running
    while (getRunningContainers() < 2) {
        sleep(500);
    }
    //additional sleep for the JM/TM to start and establish connection
    sleep(2000);
    LOG.info("Two containers are running. Killing the application");
    // kill application "externally".
    try {
        YarnClient yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        // Only one running
        Assert.assertEquals(1, apps.size());
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("MyCustomName", app.getName());
        ApplicationId id = app.getApplicationId();
        yc.killApplication(id);
        while (yc.getApplications(EnumSet.of(YarnApplicationState.KILLED)).size() == 0) {
            sleep(500);
        }
    } catch (Throwable t) {
        LOG.warn("Killing failed", t);
        Assert.fail();
    } finally {
        //cleanup the yarn-properties file
        String confDirPath = System.getenv("FLINK_CONF_DIR");
        File configDirectory = new File(confDirPath);
        LOG.info("testDetachedPerJobYarnClusterInternal: Using configuration directory " + configDirectory.getAbsolutePath());
        // load the configuration
        LOG.info("testDetachedPerJobYarnClusterInternal: Trying to load configuration file");
        GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
        try {
            File yarnPropertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(GlobalConfiguration.loadConfiguration());
            if (yarnPropertiesFile.exists()) {
                LOG.info("testDetachedPerJobYarnClusterInternal: Cleaning up temporary Yarn address reference: {}", yarnPropertiesFile.getAbsolutePath());
                yarnPropertiesFile.delete();
            }
        } catch (Exception e) {
            LOG.warn("testDetachedPerJobYarnClusterInternal: Exception while deleting the JobManager address file", e);
        }
    }
    LOG.info("Finished testDetachedMode()");
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) UtilsTest.checkForLogString(org.apache.flink.yarn.UtilsTest.checkForLogString) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) Test(org.junit.Test)

Example 63 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class YARNSessionCapacitySchedulerITCase method testTaskManagerFailure.

/**
	 * Test TaskManager failure and also if the vcores are set correctly (see issue FLINK-2213).
	 */
// timeout after 100 seconds
@Test(timeout = 100000)
public void testTaskManagerFailure() {
    LOG.info("Starting testTaskManagerFailure()");
    Runner runner = startWithArgs(new String[] { "-j", flinkUberjar.getAbsolutePath(), "-t", flinkLibFolder.getAbsolutePath(), "-n", "1", "-jm", "768", "-tm", "1024", // set the slots 3 to check if the vCores are set properly!
    "-s", // set the slots 3 to check if the vCores are set properly!
    "3", "-nm", "customName", "-Dfancy-configuration-value=veryFancy", "-Dyarn.maximum-failed-containers=3", "-D" + ConfigConstants.YARN_VCORES + "=2" }, "Number of connected TaskManagers changed to 1. Slots available: 3", RunTypes.YARN_SESSION);
    Assert.assertEquals(2, getRunningContainers());
    // ------------------------ Test if JobManager web interface is accessible -------
    YarnClient yc = null;
    try {
        yc = YarnClient.createYarnClient();
        yc.init(yarnConfiguration);
        yc.start();
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        // Only one running
        Assert.assertEquals(1, apps.size());
        ApplicationReport app = apps.get(0);
        Assert.assertEquals("customName", app.getName());
        String url = app.getTrackingUrl();
        if (!url.endsWith("/")) {
            url += "/";
        }
        if (!url.startsWith("http://")) {
            url = "http://" + url;
        }
        LOG.info("Got application URL from YARN {}", url);
        String response = TestBaseUtils.getFromHTTP(url + "taskmanagers/");
        JsonNode parsedTMs = new ObjectMapper().readTree(response);
        ArrayNode taskManagers = (ArrayNode) parsedTMs.get("taskmanagers");
        Assert.assertNotNull(taskManagers);
        Assert.assertEquals(1, taskManagers.size());
        Assert.assertEquals(3, taskManagers.get(0).get("slotsNumber").asInt());
        // get the configuration from webinterface & check if the dynamic properties from YARN show up there.
        String jsonConfig = TestBaseUtils.getFromHTTP(url + "jobmanager/config");
        Map<String, String> parsedConfig = WebMonitorUtils.fromKeyValueJsonArray(jsonConfig);
        Assert.assertEquals("veryFancy", parsedConfig.get("fancy-configuration-value"));
        Assert.assertEquals("3", parsedConfig.get("yarn.maximum-failed-containers"));
        Assert.assertEquals("2", parsedConfig.get(ConfigConstants.YARN_VCORES));
        // -------------- FLINK-1902: check if jobmanager hostname/port are shown in web interface
        // first, get the hostname/port
        String oC = outContent.toString();
        Pattern p = Pattern.compile("Flink JobManager is now running on ([a-zA-Z0-9.-]+):([0-9]+)");
        Matcher matches = p.matcher(oC);
        String hostname = null;
        String port = null;
        while (matches.find()) {
            hostname = matches.group(1).toLowerCase();
            port = matches.group(2);
        }
        LOG.info("Extracted hostname:port: {} {}", hostname, port);
        Assert.assertEquals("unable to find hostname in " + jsonConfig, hostname, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY));
        Assert.assertEquals("unable to find port in " + jsonConfig, port, parsedConfig.get(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY));
        // test logfile access
        String logs = TestBaseUtils.getFromHTTP(url + "jobmanager/log");
        Assert.assertTrue(logs.contains("Starting YARN ApplicationMaster"));
        Assert.assertTrue(logs.contains("Starting JobManager"));
        Assert.assertTrue(logs.contains("Starting JobManager Web Frontend"));
    } catch (Throwable e) {
        LOG.warn("Error while running test", e);
        Assert.fail(e.getMessage());
    }
    // ------------------------ Kill container with TaskManager and check if vcores are set correctly -------
    // find container id of taskManager:
    ContainerId taskManagerContainer = null;
    NodeManager nodeManager = null;
    UserGroupInformation remoteUgi = null;
    NMTokenIdentifier nmIdent = null;
    try {
        remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
        LOG.warn("Unable to get curr user", e);
        Assert.fail();
    }
    for (int nmId = 0; nmId < NUM_NODEMANAGERS; nmId++) {
        NodeManager nm = yarnCluster.getNodeManager(nmId);
        ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers();
        for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) {
            String command = Joiner.on(" ").join(entry.getValue().getLaunchContext().getCommands());
            if (command.contains(YarnTaskManager.class.getSimpleName())) {
                taskManagerContainer = entry.getKey();
                nodeManager = nm;
                nmIdent = new NMTokenIdentifier(taskManagerContainer.getApplicationAttemptId(), null, "", 0);
                // allow myself to do stuff with the container
                // remoteUgi.addCredentials(entry.getValue().getCredentials());
                remoteUgi.addTokenIdentifier(nmIdent);
            }
        }
        sleep(500);
    }
    Assert.assertNotNull("Unable to find container with TaskManager", taskManagerContainer);
    Assert.assertNotNull("Illegal state", nodeManager);
    yc.stop();
    List<ContainerId> toStop = new LinkedList<ContainerId>();
    toStop.add(taskManagerContainer);
    StopContainersRequest scr = StopContainersRequest.newInstance(toStop);
    try {
        nodeManager.getNMContext().getContainerManager().stopContainers(scr);
    } catch (Throwable e) {
        LOG.warn("Error stopping container", e);
        Assert.fail("Error stopping container: " + e.getMessage());
    }
    // stateful termination check:
    // wait until we saw a container being killed and AFTERWARDS a new one launched
    boolean ok = false;
    do {
        LOG.debug("Waiting for correct order of events. Output: {}", errContent.toString());
        String o = errContent.toString();
        int killedOff = o.indexOf("Container killed by the ApplicationMaster");
        if (killedOff != -1) {
            o = o.substring(killedOff);
            ok = o.indexOf("Launching TaskManager") > 0;
        }
        sleep(1000);
    } while (!ok);
    // send "stop" command to command line interface
    runner.sendStop();
    // wait for the thread to stop
    try {
        runner.join(1000);
    } catch (InterruptedException e) {
        LOG.warn("Interrupted while stopping runner", e);
    }
    LOG.warn("stopped");
    // ----------- Send output to logger
    System.setOut(originalStdout);
    System.setErr(originalStderr);
    String oC = outContent.toString();
    String eC = errContent.toString();
    LOG.info("Sending stdout content through logger: \n\n{}\n\n", oC);
    LOG.info("Sending stderr content through logger: \n\n{}\n\n", eC);
    // ------ Check if everything happened correctly
    Assert.assertTrue("Expect to see failed container", eC.contains("New messages from the YARN cluster"));
    Assert.assertTrue("Expect to see failed container", eC.contains("Container killed by the ApplicationMaster"));
    Assert.assertTrue("Expect to see new container started", eC.contains("Launching TaskManager") && eC.contains("on host"));
    // cleanup auth for the subsequent tests.
    remoteUgi.getTokenIdentifiers().remove(nmIdent);
    LOG.info("Finished testTaskManagerFailure()");
}
Also used : NMTokenIdentifier(org.apache.hadoop.yarn.security.NMTokenIdentifier) Matcher(java.util.regex.Matcher) JsonNode(com.fasterxml.jackson.databind.JsonNode) UtilsTest.checkForLogString(org.apache.flink.yarn.UtilsTest.checkForLogString) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) Pattern(java.util.regex.Pattern) IOException(java.io.IOException) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) NodeManager(org.apache.hadoop.yarn.server.nodemanager.NodeManager) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Example 64 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.

the class TestLogsCLI method testFetchRunningApplicationLogs.

@Test(timeout = 5000)
public void testFetchRunningApplicationLogs() throws Exception {
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    NodeId nodeId = NodeId.newInstance("localhost", 1234);
    ApplicationId appId = ApplicationId.newInstance(0, 1);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
    // Create a mock ApplicationAttempt Report
    ApplicationAttemptReport mockAttemptReport = mock(ApplicationAttemptReport.class);
    doReturn(appAttemptId).when(mockAttemptReport).getApplicationAttemptId();
    List<ApplicationAttemptReport> attemptReports = Arrays.asList(mockAttemptReport);
    // Create two mock containerReports
    ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 1);
    ContainerReport mockContainerReport1 = mock(ContainerReport.class);
    doReturn(containerId1).when(mockContainerReport1).getContainerId();
    doReturn(nodeId).when(mockContainerReport1).getAssignedNode();
    doReturn("http://localhost:2345").when(mockContainerReport1).getNodeHttpAddress();
    ContainerId containerId2 = ContainerId.newContainerId(appAttemptId, 2);
    ContainerReport mockContainerReport2 = mock(ContainerReport.class);
    doReturn(containerId2).when(mockContainerReport2).getContainerId();
    doReturn(nodeId).when(mockContainerReport2).getAssignedNode();
    doReturn("http://localhost:2345").when(mockContainerReport2).getNodeHttpAddress();
    List<ContainerReport> containerReports = Arrays.asList(mockContainerReport1, mockContainerReport2);
    // Mock the YarnClient, and it would report the previous created
    // mockAttemptReport and previous two created mockContainerReports
    YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.RUNNING, ugi.getShortUserName(), true, attemptReports, containerReports);
    LogsCLI cli = spy(new LogsCLIForTest(mockYarnClient));
    doReturn(0).when(cli).printContainerLogsFromRunningApplication(any(Configuration.class), any(ContainerLogsRequest.class), any(LogCLIHelpers.class), anyBoolean());
    cli.setConf(new YarnConfiguration());
    int exitCode = cli.run(new String[] { "-applicationId", appId.toString() });
    assertTrue(exitCode == 0);
    ArgumentCaptor<ContainerLogsRequest> logsRequestCaptor = ArgumentCaptor.forClass(ContainerLogsRequest.class);
    // we have two container reports, so make sure we have called
    // printContainerLogsFromRunningApplication twice
    verify(cli, times(2)).printContainerLogsFromRunningApplication(any(Configuration.class), logsRequestCaptor.capture(), any(LogCLIHelpers.class), anyBoolean());
    // Verify that the log-type is "ALL"
    List<ContainerLogsRequest> capturedRequests = logsRequestCaptor.getAllValues();
    Assert.assertEquals(2, capturedRequests.size());
    Set<String> logTypes0 = capturedRequests.get(0).getLogTypes();
    Set<String> logTypes1 = capturedRequests.get(1).getLogTypes();
    Assert.assertTrue(logTypes0.contains("ALL") && (logTypes0.size() == 1));
    Assert.assertTrue(logTypes1.contains("ALL") && (logTypes1.size() == 1));
    mockYarnClient = createMockYarnClientWithException(YarnApplicationState.RUNNING, ugi.getShortUserName());
    LogsCLI cli2 = spy(new LogsCLIForTest(mockYarnClient));
    doReturn(0).when(cli2).printContainerLogsFromRunningApplication(any(Configuration.class), any(ContainerLogsRequest.class), any(LogCLIHelpers.class), anyBoolean());
    doReturn("123").when(cli2).getNodeHttpAddressFromRMWebString(any(ContainerLogsRequest.class));
    cli2.setConf(new YarnConfiguration());
    ContainerId containerId100 = ContainerId.newContainerId(appAttemptId, 100);
    exitCode = cli2.run(new String[] { "-applicationId", appId.toString(), "-containerId", containerId100.toString(), "-nodeAddress", "NM:1234" });
    assertTrue(exitCode == 0);
    verify(cli2, times(1)).printContainerLogsFromRunningApplication(any(Configuration.class), logsRequestCaptor.capture(), any(LogCLIHelpers.class), anyBoolean());
}
Also used : ApplicationAttemptReport(org.apache.hadoop.yarn.api.records.ApplicationAttemptReport) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerLogsRequest(org.apache.hadoop.yarn.logaggregation.ContainerLogsRequest) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Matchers.anyString(org.mockito.Matchers.anyString) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerReport(org.apache.hadoop.yarn.api.records.ContainerReport) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NodeId(org.apache.hadoop.yarn.api.records.NodeId) LogCLIHelpers(org.apache.hadoop.yarn.logaggregation.LogCLIHelpers) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 65 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.

the class TestLogsCLI method testFetchApplictionLogsHar.

@Test(timeout = 15000)
public void testFetchApplictionLogsHar() throws Exception {
    String remoteLogRootDir = "target/logs/";
    Configuration configuration = new Configuration();
    configuration.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
    configuration.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogRootDir);
    configuration.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
    configuration.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
    FileSystem fs = FileSystem.get(configuration);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    URL harUrl = ClassLoader.getSystemClassLoader().getResource("application_1440536969523_0001.har");
    assertNotNull(harUrl);
    Path path = new Path(remoteLogRootDir + ugi.getShortUserName() + "/logs/application_1440536969523_0001");
    if (fs.exists(path)) {
        fs.delete(path, true);
    }
    assertTrue(fs.mkdirs(path));
    Path harPath = new Path(path, "application_1440536969523_0001.har");
    fs.copyFromLocalFile(false, new Path(harUrl.toURI()), harPath);
    assertTrue(fs.exists(harPath));
    YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED, ugi.getShortUserName());
    LogsCLI cli = new LogsCLIForTest(mockYarnClient);
    cli.setConf(configuration);
    int exitCode = cli.run(new String[] { "-applicationId", "application_1440536969523_0001" });
    assertTrue(exitCode == 0);
    String out = sysOutStream.toString();
    assertTrue(out.contains("container_1440536969523_0001_01_000001 on host1_1111"));
    assertTrue(out.contains("Hello stderr"));
    assertTrue(out.contains("Hello stdout"));
    assertTrue(out.contains("Hello syslog"));
    assertTrue(out.contains("container_1440536969523_0001_01_000002 on host2_2222"));
    assertTrue(out.contains("Goodbye stderr"));
    assertTrue(out.contains("Goodbye stdout"));
    assertTrue(out.contains("Goodbye syslog"));
    sysOutStream.reset();
    fs.delete(new Path(remoteLogRootDir), true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Matchers.anyString(org.mockito.Matchers.anyString) URL(java.net.URL) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Aggregations

YarnClient (org.apache.hadoop.yarn.client.api.YarnClient)89 Test (org.junit.Test)51 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)50 Configuration (org.apache.hadoop.conf.Configuration)45 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)37 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)21 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)18 IOException (java.io.IOException)17 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)15 MiniYARNCluster (org.apache.hadoop.yarn.server.MiniYARNCluster)15 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)14 Path (org.apache.hadoop.fs.Path)13 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)13 FileSystem (org.apache.hadoop.fs.FileSystem)11 Matchers.anyString (org.mockito.Matchers.anyString)11 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9 ArrayList (java.util.ArrayList)8 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)8 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)7