Search in sources :

Example 1 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class AbstractYarnClusterDescriptor method deployInternal.

/**
	 * This method will block until the ApplicationMaster/JobManager have been
	 * deployed on YARN.
	 */
protected YarnClusterClient deployInternal() throws Exception {
    isReadyForDeployment();
    LOG.info("Using values:");
    LOG.info("\tTaskManager count = {}", taskManagerCount);
    LOG.info("\tJobManager memory = {}", jobManagerMemoryMb);
    LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb);
    final YarnClient yarnClient = getYarnClient();
    try {
        List<QueueInfo> queues = yarnClient.getAllQueues();
        if (queues.size() > 0 && this.yarnQueue != null) {
            // check only if there are queues configured in yarn and for this session.
            boolean queueFound = false;
            for (QueueInfo queue : queues) {
                if (queue.getQueueName().equals(this.yarnQueue)) {
                    queueFound = true;
                    break;
                }
            }
            if (!queueFound) {
                String queueNames = "";
                for (QueueInfo queue : queues) {
                    queueNames += queue.getQueueName() + ", ";
                }
                LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames);
            }
        } else {
            LOG.debug("The YARN cluster does not have any queues configured");
        }
    } catch (Throwable e) {
        LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error details", e);
        }
    }
    // ------------------ Add dynamic properties to local flinkConfiguraton ------
    Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
    for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
        flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
    }
    // ------------------ Check if the YARN ClusterClient has the requested resources --------------
    // the yarnMinAllocationMB specifies the smallest possible container allocation size.
    // all allocations below this value are automatically set to this value.
    final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0);
    if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
        LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. " + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances " + "you requested will start.");
    }
    // set the memory to minAllocationMB to do the next checks correctly
    if (jobManagerMemoryMb < yarnMinAllocationMB) {
        jobManagerMemoryMb = yarnMinAllocationMB;
    }
    if (taskManagerMemoryMb < yarnMinAllocationMB) {
        taskManagerMemoryMb = yarnMinAllocationMB;
    }
    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();
    Resource maxRes = appResponse.getMaximumResourceCapability();
    final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the 'yarn.nodemanager.resource.memory-mb' configuration values\n";
    if (jobManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE);
    }
    if (taskManagerMemoryMb > maxRes.getMemory()) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE);
    }
    final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, but maybe not all TaskManagers are " + "connecting from the beginning because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink YARN client needs to wait until " + "the resources become available.";
    int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount;
    ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    if (freeClusterMem.totalFreeMemory < totalMemoryRequired) {
        LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC);
    }
    if (taskManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }
    if (jobManagerMemoryMb > freeClusterMem.containerLimit) {
        LOG.warn("The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC);
    }
    // ----------------- check if the requested containers fit into the cluster.
    int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length);
    // first, allocate the jobManager somewhere.
    if (!allocateResource(nmFree, jobManagerMemoryMb)) {
        LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC);
    }
    // allocate TaskManagers
    for (int i = 0; i < taskManagerCount; i++) {
        if (!allocateResource(nmFree, taskManagerMemoryMb)) {
            LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC);
        }
    }
    ApplicationReport report = startAppMaster(null, yarnClient, yarnApplication);
    String host = report.getHost();
    int port = report.getRpcPort();
    // Correctly initialize the Flink config
    flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host);
    flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port);
    // the Flink cluster is deployed in YARN. Represent cluster
    return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true);
}
Also used : QueueInfo(org.apache.hadoop.yarn.api.records.QueueInfo) GetNewApplicationResponse(org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class AbstractYarnClusterDescriptor method getClusterDescription.

@Override
public String getClusterDescription() {
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PrintStream ps = new PrintStream(baos);
        YarnClient yarnClient = getYarnClient();
        YarnClusterMetrics metrics = yarnClient.getYarnClusterMetrics();
        ps.append("NodeManagers in the ClusterClient " + metrics.getNumNodeManagers());
        List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING);
        final String format = "|%-16s |%-16s %n";
        ps.printf("|Property         |Value          %n");
        ps.println("+---------------------------------------+");
        int totalMemory = 0;
        int totalCores = 0;
        for (NodeReport rep : nodes) {
            final Resource res = rep.getCapability();
            totalMemory += res.getMemory();
            totalCores += res.getVirtualCores();
            ps.format(format, "NodeID", rep.getNodeId());
            ps.format(format, "Memory", res.getMemory() + " MB");
            ps.format(format, "vCores", res.getVirtualCores());
            ps.format(format, "HealthReport", rep.getHealthReport());
            ps.format(format, "Containers", rep.getNumContainers());
            ps.println("+---------------------------------------+");
        }
        ps.println("Summary: totalMemory " + totalMemory + " totalCores " + totalCores);
        List<QueueInfo> qInfo = yarnClient.getAllQueues();
        for (QueueInfo q : qInfo) {
            ps.println("Queue: " + q.getQueueName() + ", Current Capacity: " + q.getCurrentCapacity() + " Max Capacity: " + q.getMaximumCapacity() + " Applications: " + q.getApplications().size());
        }
        yarnClient.stop();
        return baos.toString();
    } catch (Exception e) {
        throw new RuntimeException("Couldn't get cluster description", e);
    }
}
Also used : QueueInfo(org.apache.hadoop.yarn.api.records.QueueInfo) PrintStream(java.io.PrintStream) YarnClusterMetrics(org.apache.hadoop.yarn.api.records.YarnClusterMetrics) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ByteArrayOutputStream(java.io.ByteArrayOutputStream) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) URISyntaxException(java.net.URISyntaxException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport)

Example 3 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project flink by apache.

the class YARNSessionCapacitySchedulerITCase method testDetachedPerJobYarnClusterInternal.

private void testDetachedPerJobYarnClusterInternal(String job) {
    YarnClient yc = YarnClient.createYarnClient();
    yc.init(yarnConfiguration);
    yc.start();
    // get temporary folder for writing output of wordcount example
    File tmpOutFolder = null;
    try {
        tmpOutFolder = tmp.newFolder();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    // get temporary file for reading input data for wordcount example
    File tmpInFile;
    try {
        tmpInFile = tmp.newFile();
        FileUtils.writeStringToFile(tmpInFile, WordCountData.TEXT);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Runner runner = startWithArgs(new String[] { "run", "-m", "yarn-cluster", "-yj", flinkUberjar.getAbsolutePath(), "-yt", flinkLibFolder.getAbsolutePath(), "-yn", "1", "-yjm", "768", // test if the cutoff is passed correctly
    "-yD", // test if the cutoff is passed correctly
    "yarn.heap-cutoff-ratio=0.5", "-yD", "yarn.tags=test-tag", "-ytm", "1024", // test requesting slots from YARN.
    "-ys", // test requesting slots from YARN.
    "2", "--yarndetached", job, "--input", tmpInFile.getAbsoluteFile().toString(), "--output", tmpOutFolder.getAbsoluteFile().toString() }, "Job has been submitted with JobID", RunTypes.CLI_FRONTEND);
    // it should usually be 2, but on slow machines, the number varies
    Assert.assertTrue("There should be at most 2 containers running", getRunningContainers() <= 2);
    // give the runner some time to detach
    for (int attempt = 0; runner.isAlive() && attempt < 5; attempt++) {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
        }
    }
    Assert.assertFalse("The runner should detach.", runner.isAlive());
    LOG.info("CLI Frontend has returned, so the job is running");
    // find out the application id and wait until it has finished.
    try {
        List<ApplicationReport> apps = yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING));
        ApplicationId tmpAppId;
        if (apps.size() == 1) {
            // Better method to find the right appId. But sometimes the app is shutting down very fast
            // Only one running
            tmpAppId = apps.get(0).getApplicationId();
            LOG.info("waiting for the job with appId {} to finish", tmpAppId);
            // wait until the app has finished
            while (yc.getApplications(EnumSet.of(YarnApplicationState.RUNNING)).size() > 0) {
                sleep(500);
            }
        } else {
            // get appId by finding the latest finished appid
            apps = yc.getApplications();
            Collections.sort(apps, new Comparator<ApplicationReport>() {

                @Override
                public int compare(ApplicationReport o1, ApplicationReport o2) {
                    return o1.getApplicationId().compareTo(o2.getApplicationId()) * -1;
                }
            });
            tmpAppId = apps.get(0).getApplicationId();
            LOG.info("Selected {} as the last appId from {}", tmpAppId, Arrays.toString(apps.toArray()));
        }
        final ApplicationId id = tmpAppId;
        // now it has finished.
        // check the output files.
        File[] listOfOutputFiles = tmpOutFolder.listFiles();
        Assert.assertNotNull("Taskmanager output not found", listOfOutputFiles);
        LOG.info("The job has finished. TaskManager output files found in {}", tmpOutFolder);
        // read all output files in output folder to one output string
        String content = "";
        for (File f : listOfOutputFiles) {
            if (f.isFile()) {
                content += FileUtils.readFileToString(f) + "\n";
            }
        }
        //String content = FileUtils.readFileToString(taskmanagerOut);
        // check for some of the wordcount outputs.
        Assert.assertTrue("Expected string 'da 5' or '(all,2)' not found in string '" + content + "'", content.contains("da 5") || content.contains("(da,5)") || content.contains("(all,2)"));
        Assert.assertTrue("Expected string 'der 29' or '(mind,1)' not found in string'" + content + "'", content.contains("der 29") || content.contains("(der,29)") || content.contains("(mind,1)"));
        // check if the heap size for the TaskManager was set correctly
        File jobmanagerLog = YarnTestBase.findFile("..", new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                return name.contains("jobmanager.log") && dir.getAbsolutePath().contains(id.toString());
            }
        });
        Assert.assertNotNull("Unable to locate JobManager log", jobmanagerLog);
        content = FileUtils.readFileToString(jobmanagerLog);
        // TM was started with 1024 but we cut off 50% (NOT THE DEFAULT VALUE)
        String expected = "Starting TaskManagers with command: $JAVA_HOME/bin/java -Xms424m -Xmx424m";
        Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log: '" + jobmanagerLog + "'", content.contains(expected));
        expected = " (2/2) (attempt #0) to ";
        Assert.assertTrue("Expected string '" + expected + "' not found in JobManager log." + "This string checks that the job has been started with a parallelism of 2. Log contents: '" + jobmanagerLog + "'", content.contains(expected));
        // make sure the detached app is really finished.
        LOG.info("Checking again that app has finished");
        ApplicationReport rep;
        do {
            sleep(500);
            rep = yc.getApplicationReport(id);
            LOG.info("Got report {}", rep);
        } while (rep.getYarnApplicationState() == YarnApplicationState.RUNNING);
        verifyApplicationTags(rep);
    } catch (Throwable t) {
        LOG.warn("Error while detached yarn session was running", t);
        Assert.fail(t.getMessage());
    } finally {
        //cleanup the yarn-properties file
        String confDirPath = System.getenv("FLINK_CONF_DIR");
        File configDirectory = new File(confDirPath);
        LOG.info("testDetachedPerJobYarnClusterInternal: Using configuration directory " + configDirectory.getAbsolutePath());
        // load the configuration
        LOG.info("testDetachedPerJobYarnClusterInternal: Trying to load configuration file");
        GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
        try {
            File yarnPropertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(GlobalConfiguration.loadConfiguration());
            if (yarnPropertiesFile.exists()) {
                LOG.info("testDetachedPerJobYarnClusterInternal: Cleaning up temporary Yarn address reference: {}", yarnPropertiesFile.getAbsolutePath());
                yarnPropertiesFile.delete();
            }
        } catch (Exception e) {
            LOG.warn("testDetachedPerJobYarnClusterInternal: Exception while deleting the JobManager address file", e);
        }
    }
}
Also used : IOException(java.io.IOException) UtilsTest.checkForLogString(org.apache.flink.yarn.UtilsTest.checkForLogString) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) FilenameFilter(java.io.FilenameFilter) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File)

Example 4 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.

the class HadoopArchiveLogs method filterAppsByAggregatedStatus.

@VisibleForTesting
void filterAppsByAggregatedStatus() throws IOException, YarnException {
    YarnClient client = YarnClient.createYarnClient();
    try {
        client.init(getConf());
        client.start();
        for (Iterator<AppInfo> it = eligibleApplications.iterator(); it.hasNext(); ) {
            AppInfo app = it.next();
            try {
                ApplicationReport report = client.getApplicationReport(ApplicationId.fromString(app.getAppId()));
                LogAggregationStatus aggStatus = report.getLogAggregationStatus();
                if (aggStatus.equals(LogAggregationStatus.RUNNING) || aggStatus.equals(LogAggregationStatus.RUNNING_WITH_FAILURE) || aggStatus.equals(LogAggregationStatus.NOT_START) || aggStatus.equals(LogAggregationStatus.DISABLED) || aggStatus.equals(LogAggregationStatus.FAILED)) {
                    if (verbose) {
                        LOG.info("Skipping " + app.getAppId() + " due to aggregation status being " + aggStatus);
                    }
                    it.remove();
                } else {
                    if (verbose) {
                        LOG.info(app.getAppId() + " has aggregation status " + aggStatus);
                    }
                    app.setFinishTime(report.getFinishTime());
                }
            } catch (ApplicationNotFoundException e) {
                // Assume the aggregation has finished
                if (verbose) {
                    LOG.info(app.getAppId() + " not in the ResourceManager");
                }
            }
        }
    } finally {
        if (client != null) {
            client.stop();
        }
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with YarnClient

use of org.apache.hadoop.yarn.client.api.YarnClient in project hadoop by apache.

the class TestYarnClient method setupYarnClient.

private YarnClient setupYarnClient(MiniYARNCluster cluster) {
    final Configuration yarnConf = cluster.getConfig();
    YarnClient client = YarnClient.createYarnClient();
    client.init(yarnConf);
    client.start();
    return client;
}
Also used : CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient)

Aggregations

YarnClient (org.apache.hadoop.yarn.client.api.YarnClient)88 Test (org.junit.Test)51 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)49 Configuration (org.apache.hadoop.conf.Configuration)44 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)37 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)20 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)18 IOException (java.io.IOException)17 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)15 MiniYARNCluster (org.apache.hadoop.yarn.server.MiniYARNCluster)15 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)14 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)13 Path (org.apache.hadoop.fs.Path)12 Matchers.anyString (org.mockito.Matchers.anyString)11 FileSystem (org.apache.hadoop.fs.FileSystem)10 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9 ArrayList (java.util.ArrayList)8 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)8 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)7