Search in sources :

Example 76 with ApplicationSubmissionContext

use of org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext in project asterixdb by apache.

the class AsterixYARNClient method makeApplicationContext.

/**
     * Initialize and register the application attempt with the YARN ResourceManager.
     *
     * @return
     * @throws IOException
     * @throws YarnException
     */
public YarnClientApplication makeApplicationContext() throws IOException, YarnException {
    //first check to see if an instance already exists.
    FileSystem fs = FileSystem.get(conf);
    Path lock = new Path(fs.getHomeDirectory(), CONF_DIR_REL + instanceFolder + instanceLock);
    LOG.info("Running Deployment");
    yarnClient.start();
    if (fs.exists(lock)) {
        ApplicationId lockAppId = getLockFile();
        try {
            ApplicationReport previousAppReport = yarnClient.getApplicationReport(lockAppId);
            YarnApplicationState prevStatus = previousAppReport.getYarnApplicationState();
            if (!(prevStatus == YarnApplicationState.FAILED || prevStatus == YarnApplicationState.KILLED || prevStatus == YarnApplicationState.FINISHED) && mode != Mode.DESTROY && mode != Mode.BACKUP && mode != Mode.RESTORE) {
                throw new IllegalStateException("Instance is already running in: " + lockAppId);
            } else if (mode != Mode.DESTROY && mode != Mode.BACKUP && mode != Mode.RESTORE) {
                //stale lock file
                LOG.warn("Stale lockfile detected. Instance attempt " + lockAppId + " may have exited abnormally");
                deleteLockFile();
            }
        } catch (YarnException e) {
            LOG.warn("Stale lockfile detected, but the RM has no record of this application's last run. This is normal if the cluster was restarted.");
            deleteLockFile();
        }
    }
    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }
    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    appContext.setApplicationName(appName);
    return app;
}
Also used : Path(org.apache.hadoop.fs.Path) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) GetNewApplicationResponse(org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) FileSystem(org.apache.hadoop.fs.FileSystem) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Example 77 with ApplicationSubmissionContext

use of org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext in project apex-core by apache.

the class StramClient method startApplication.

/**
   * Launch application for the dag represented by this client.
   *
   * @throws YarnException
   * @throws IOException
   */
public void startApplication() throws YarnException, IOException {
    Class<?>[] defaultClasses;
    if (applicationType.equals(YARN_APPLICATION_TYPE)) {
        //TODO restrict the security check to only check if security is enabled for webservices.
        if (UserGroupInformation.isSecurityEnabled()) {
            defaultClasses = APEX_SECURITY_CLASSES;
        } else {
            defaultClasses = APEX_CLASSES;
        }
    } else {
        throw new IllegalStateException(applicationType + " is not a valid application type.");
    }
    LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses);
    if (resources != null) {
        localJarFiles.addAll(resources);
    }
    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM, numNodeManagers={}", clusterMetrics.getNumNodeManagers());
    //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
    //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq);
    //LOG.info("Got Cluster node info from ASM");
    //for (NodeReport node : clusterNodesResp.getNodeReports()) {
    //  LOG.info("Got node report from ASM for"
    //           + ", nodeId=" + node.getNodeId()
    //           + ", nodeAddress" + node.getHttpAddress()
    //           + ", nodeRackName" + node.getRackName()
    //           + ", nodeNumContainers" + node.getNumContainers()
    //           + ", nodeHealthStatus" + node.getHealthReport());
    //}
    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue queueName={}, userAcl={}", aclInfo.getQueueName(), userAcl.name());
        }
    }
    // Get a new application id
    YarnClientApplication newApp = yarnClient.createApplication();
    appId = newApp.getNewApplicationResponse().getApplicationId();
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capability of resources in this cluster " + maxMem);
    int amMemory = dag.getMasterMemoryMB();
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value, specified={}, max={}", amMemory, maxMem);
        amMemory = maxMem;
    }
    if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) {
        dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString());
    }
    // Create launch context for app master
    LOG.info("Setting up application submission context for ASM");
    ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
    // set the application id
    appContext.setApplicationId(appId);
    // set the application name
    appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME));
    appContext.setApplicationType(this.applicationType);
    if (YARN_APPLICATION_TYPE.equals(this.applicationType)) {
    //appContext.setMaxAppAttempts(1); // no retries until Stram is HA
    }
    appContext.setKeepContainersAcrossApplicationAttempts(true);
    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
    // application master launch.
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }
        // For now, only getting tokens for the default file-system.
        try (FileSystem fs = StramClientUtils.newFileSystemInstance(conf)) {
            final Token<?>[] tokens = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
        }
        new ClientRMHelper(yarnClient, conf).addRMDelegationToken(tokenRenewer, credentials);
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }
    // Setup ACLs for the impersonating user
    LOG.debug("ACL login user {} current user {}", UserGroupInformation.getLoginUser(), UserGroupInformation.getCurrentUser());
    if (!UserGroupInformation.getCurrentUser().equals(UserGroupInformation.getLoginUser())) {
        ACLManager.setupUserACLs(amContainer, UserGroupInformation.getLoginUser().getShortUserName(), conf);
    }
    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<>();
    // copy required jar files to dfs, to be localized for containers
    try (FileSystem fs = StramClientUtils.newFileSystemInstance(conf)) {
        Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS);
        Path appPath;
        String configuredAppPath = dag.getValue(LogicalPlan.APPLICATION_PATH);
        if (configuredAppPath == null) {
            appPath = new Path(appsBasePath, appId.toString());
        } else {
            appPath = new Path(configuredAppPath);
        }
        String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {}));
        LOG.info("libjars: {}", libJarsCsv);
        dag.getAttributes().put(Context.DAGContext.LIBRARY_JARS, libJarsCsv);
        LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources, fs);
        if (archives != null) {
            String[] localFiles = archives.split(",");
            String archivesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("archives: {}", archivesCsv);
            dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv, localResources, fs);
        }
        if (files != null) {
            String[] localFiles = files.split(",");
            String filesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("files: {}", filesCsv);
            dag.getAttributes().put(LogicalPlan.FILES, filesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources, fs);
        }
        dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString());
        StorageAgent agent = dag.getAttributes().get(OperatorContext.STORAGE_AGENT);
        if (agent != null && agent instanceof StorageAgent.ApplicationAwareStorageAgent) {
            ((StorageAgent.ApplicationAwareStorageAgent) agent).setApplicationAttributes(dag.getAttributes());
        }
        if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) {
            /* which would be the most likely case */
            Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS);
            // use conf client side to pickup any proxy settings from dt-site.xml
            dag.setAttribute(OperatorContext.STORAGE_AGENT, new AsyncFSStorageAgent(checkpointPath.toString(), conf));
        }
        if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) {
            dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator());
        }
        // Set the log4j properties if needed
        if (!log4jPropFile.isEmpty()) {
            Path log4jSrc = new Path(log4jPropFile);
            Path log4jDst = new Path(appPath, "log4j.props");
            fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
            FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
            LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
            log4jRsrc.setType(LocalResourceType.FILE);
            log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
            log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
            log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
            log4jRsrc.setSize(log4jFileStatus.getLen());
            localResources.put("log4j.properties", log4jRsrc);
        }
        if (originalAppId != null) {
            Path origAppPath = new Path(appsBasePath, this.originalAppId);
            LOG.info("Restart from {}", origAppPath);
            copyInitialState(origAppPath);
        }
        // push logical plan to DFS location
        Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME);
        FSDataOutputStream outStream = fs.create(cfgDst, true);
        LogicalPlan.write(this.dag, outStream);
        outStream.close();
        Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME);
        outStream = fs.create(launchConfigDst, true);
        conf.writeXml(outStream);
        outStream.close();
        LaunchContainerRunnable.addFileToLocalResources(LogicalPlan.SER_FILE_NAME, fs.getFileStatus(cfgDst), LocalResourceType.FILE, localResources);
        // Set local resource info into app master container launch context
        amContainer.setLocalResources(localResources);
        // Set the necessary security tokens as needed
        //amContainer.setContainerTokens(containerToken);
        // Set the env variables to be setup in the env where the application master will be run
        LOG.info("Set the environment for the application master");
        Map<String, String> env = new HashMap<>();
        // Add application jar(s) location to classpath
        // At some point we should not be required to add
        // the hadoop specific classpaths to the env.
        // It should be provided out of the box.
        // For now setting all required classpaths including
        // the classpath to "." for the application jar(s)
        // including ${CLASSPATH} will duplicate the class path in app master, removing it for now
        //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
        StringBuilder classPathEnv = new StringBuilder("./*");
        String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH);
        for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) {
            if (c.equals("$HADOOP_CLIENT_CONF_DIR")) {
                // SPOI-2501
                continue;
            }
            classPathEnv.append(':');
            classPathEnv.append(c.trim());
        }
        env.put("CLASSPATH", classPathEnv.toString());
        // propagate to replace node managers user name (effective in non-secure mode)
        // also to indicate original login user during impersonation and important for setting ACLs
        env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName());
        amContainer.setEnvironment(env);
        // Set the necessary command to execute the application master
        ArrayList<CharSequence> vargs = new ArrayList<>(30);
        // Set java executable command
        LOG.info("Setting up app master command");
        vargs.add(javaCmd);
        if (dag.isDebug()) {
            vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n");
        }
        // default heap size 75% of total memory
        if (dag.getMasterJVMOptions() != null) {
            vargs.add(dag.getMasterJVMOptions());
        }
        Path tmpDir = new Path(ApplicationConstants.Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR);
        vargs.add("-Djava.io.tmpdir=" + tmpDir);
        vargs.add("-Xmx" + (amMemory * 3 / 4) + "m");
        vargs.add("-XX:+HeapDumpOnOutOfMemoryError");
        vargs.add("-XX:HeapDumpPath=" + System.getProperty("java.io.tmpdir") + "/dt-heap-" + appId.getId() + ".bin");
        vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA");
        vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
        vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath()));
        StramClientUtils.addAttributeToArgs(LogicalPlan.APPLICATION_NAME, dag, vargs);
        StramClientUtils.addAttributeToArgs(LogicalPlan.LOGGER_APPENDER, dag, vargs);
        if (dag.isDebug()) {
            vargs.add("-Dlog4j.debug=true");
        }
        String loggersLevel = conf.get(StramUtils.DT_LOGGERS_LEVEL);
        if (loggersLevel != null) {
            vargs.add(String.format("-D%s=%s", StramUtils.DT_LOGGERS_LEVEL, loggersLevel));
        }
        vargs.add(StreamingAppMaster.class.getName());
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");
        // Get final command
        StringBuilder command = new StringBuilder(9 * vargs.size());
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }
        LOG.info("Completed setting up app master command " + command.toString());
        List<String> commands = new ArrayList<>();
        commands.add(command.toString());
        amContainer.setCommands(commands);
        // Set up resource type requirements
        // For now, only memory is supported so we set memory requirements
        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(amMemory);
        appContext.setResource(capability);
        // Service data is a binary blob that can be passed to the application
        // Not needed in this scenario
        // amContainer.setServiceData(serviceData);
        appContext.setAMContainerSpec(amContainer);
        // Set the priority for the application master
        Priority pri = Records.newRecord(Priority.class);
        pri.setPriority(amPriority);
        appContext.setPriority(pri);
        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue(queueName);
        // set the application tags
        appContext.setApplicationTags(tags);
        // Submit the application to the applications manager
        // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest);
        // Ignore the response as either a valid response object is returned on success
        // or an exception thrown to denote some form of a failure
        String specStr = Objects.toStringHelper("Submitting application: ").add("name", appContext.getApplicationName()).add("queue", appContext.getQueue()).add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource()).toString();
        LOG.info(specStr);
        if (dag.isDebug()) {
        //LOG.info("Full submission context: " + appContext);
        }
        yarnClient.submitApplication(appContext);
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Token(org.apache.hadoop.security.token.Token) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) ClientRMHelper(com.datatorrent.stram.client.StramClientUtils.ClientRMHelper) FileSystem(org.apache.hadoop.fs.FileSystem) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Path(org.apache.hadoop.fs.Path) YarnClusterMetrics(org.apache.hadoop.yarn.api.records.YarnClusterMetrics) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) Priority(org.apache.hadoop.yarn.api.records.Priority) QueueACL(org.apache.hadoop.yarn.api.records.QueueACL) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) QueueUserACLInfo(org.apache.hadoop.yarn.api.records.QueueUserACLInfo) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) BasicContainerOptConfigurator(com.datatorrent.common.util.BasicContainerOptConfigurator) AsyncFSStorageAgent(com.datatorrent.common.util.AsyncFSStorageAgent) StorageAgent(com.datatorrent.api.StorageAgent) Credentials(org.apache.hadoop.security.Credentials)

Example 78 with ApplicationSubmissionContext

use of org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext in project flink by apache.

the class AbstractYarnClusterDescriptor method startAppMaster.

public ApplicationReport startAppMaster(JobGraph jobGraph, YarnClient yarnClient, YarnClientApplication yarnApplication) throws Exception {
    try {
        org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration);
    } catch (IOException e) {
        throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e);
    }
    // initialize file system
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    final FileSystem fs = FileSystem.get(conf);
    // hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
    if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) {
        LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values." + "The Flink YARN client needs to store its files in a distributed file system");
    }
    ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
    Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size());
    for (File file : shipFiles) {
        effectiveShipFiles.add(file.getAbsoluteFile());
    }
    //check if there is a logback or log4j file
    File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME);
    final boolean hasLogback = logbackFile.exists();
    if (hasLogback) {
        effectiveShipFiles.add(logbackFile);
    }
    File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME);
    final boolean hasLog4j = log4jFile.exists();
    if (hasLog4j) {
        effectiveShipFiles.add(log4jFile);
        if (hasLogback) {
            // this means there is already a logback configuration file --> fail
            LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them.");
        }
    }
    addLibFolderToShipFiles(effectiveShipFiles);
    // add the user jar to the classpath of the to-be-created cluster
    if (userJarFiles != null) {
        effectiveShipFiles.addAll(userJarFiles);
    }
    // Set-up ApplicationSubmissionContext for the application
    final ApplicationId appId = appContext.getApplicationId();
    // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
    String zkNamespace = getZookeeperNamespace();
    // no user specified cli argument for namespace?
    if (zkNamespace == null || zkNamespace.isEmpty()) {
        // namespace defined in config? else use applicationId as default.
        zkNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID, String.valueOf(appId));
        setZookeeperNamespace(zkNamespace);
    }
    flinkConfiguration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);
    if (HighAvailabilityMode.isHighAvailabilityModeActivated(flinkConfiguration)) {
        // activate re-execution of failed applications
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));
        activateHighAvailabilitySupport(appContext);
    } else {
        // set number of application retries to 1 in the default case
        appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1));
    }
    // local resource map for Yarn
    final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size());
    // list of remote paths (after upload)
    final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size());
    // classpath assembler
    final StringBuilder classPathBuilder = new StringBuilder();
    // ship list that enables reuse of resources for task manager containers
    StringBuilder envShipFileList = new StringBuilder();
    // upload and register ship files
    for (File shipFile : effectiveShipFiles) {
        LocalResource shipResources = Records.newRecord(LocalResource.class);
        Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath());
        Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory());
        paths.add(remotePath);
        localResources.put(shipFile.getName(), shipResources);
        if (shipFile.isDirectory()) {
            // add directories to the classpath
            java.nio.file.Path shipPath = shipFile.toPath();
            final java.nio.file.Path parentPath = shipPath.getParent();
            Files.walkFileTree(shipPath, new SimpleFileVisitor<java.nio.file.Path>() {

                @Override
                public FileVisitResult preVisitDirectory(java.nio.file.Path dir, BasicFileAttributes attrs) throws IOException {
                    super.preVisitDirectory(dir, attrs);
                    java.nio.file.Path relativePath = parentPath.relativize(dir);
                    classPathBuilder.append(relativePath).append(File.separator).append("*").append(File.pathSeparator);
                    return FileVisitResult.CONTINUE;
                }
            });
        } else {
            // add files to the classpath
            classPathBuilder.append(shipFile.getName()).append(File.pathSeparator);
        }
        envShipFileList.append(remotePath).append(",");
    }
    // Setup jar for ApplicationMaster
    LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    LocalResource flinkConf = Records.newRecord(LocalResource.class);
    Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory());
    Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory());
    localResources.put("flink.jar", appMasterJar);
    localResources.put("flink-conf.yaml", flinkConf);
    paths.add(remotePathJar);
    classPathBuilder.append("flink.jar").append(File.pathSeparator);
    paths.add(remotePathConf);
    classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);
    // TODO: server use user main method to generate job graph
    if (jobGraph != null) {
        try {
            File fp = File.createTempFile(appId.toString(), null);
            fp.deleteOnExit();
            try (FileOutputStream output = new FileOutputStream(fp);
                ObjectOutputStream obOutput = new ObjectOutputStream(output)) {
                obOutput.writeObject(jobGraph);
            }
            LocalResource jobgraph = Records.newRecord(LocalResource.class);
            Path remoteJobGraph = Utils.setupLocalResource(fs, appId.toString(), new Path(fp.toURI()), jobgraph, fs.getHomeDirectory());
            localResources.put("job.graph", jobgraph);
            paths.add(remoteJobGraph);
            classPathBuilder.append("job.graph").append(File.pathSeparator);
        } catch (Exception e) {
            LOG.warn("Add job graph to local resource fail");
            throw e;
        }
    }
    sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/");
    FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
    // set permission for path.
    fs.setPermission(sessionFilesDir, permission);
    //To support Yarn Secure Integration Test Scenario
    //In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
    //and KRB5 configuration files. We are adding these files as container local resources for the container
    //applications (JM/TMs) to have proper secure cluster setup
    Path remoteKrb5Path = null;
    Path remoteYarnSiteXmlPath = null;
    boolean hasKrb5 = false;
    if (System.getenv("IN_TESTS") != null) {
        String krb5Config = System.getProperty("java.security.krb5.conf");
        if (krb5Config != null && krb5Config.length() != 0) {
            File krb5 = new File(krb5Config);
            LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath());
            LocalResource krb5ConfResource = Records.newRecord(LocalResource.class);
            Path krb5ConfPath = new Path(krb5.getAbsolutePath());
            remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource, fs.getHomeDirectory());
            localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);
            File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME);
            LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath());
            LocalResource yarnConfResource = Records.newRecord(LocalResource.class);
            Path yarnSitePath = new Path(f.getAbsolutePath());
            remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath, yarnConfResource, fs.getHomeDirectory());
            localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);
            hasKrb5 = true;
        }
    }
    // setup security tokens
    LocalResource keytabResource = null;
    Path remotePathKeytab = null;
    String keytab = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
    if (keytab != null) {
        LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
        keytabResource = Records.newRecord(LocalResource.class);
        Path keytabPath = new Path(keytab);
        remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource, fs.getHomeDirectory());
        localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
    }
    final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5);
    if (UserGroupInformation.isSecurityEnabled() && keytab == null) {
        //set tokens only when keytab is not provided
        LOG.info("Adding delegation token to the AM container..");
        Utils.setTokensFor(amContainer, paths, conf);
    }
    amContainer.setLocalResources(localResources);
    fs.close();
    // Setup CLASSPATH and environment variables for ApplicationMaster
    final Map<String, String> appMasterEnv = new HashMap<>();
    // set user specified app master environment variables
    appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration));
    // set Flink app class path
    appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());
    // set Flink on YARN internal configuration values
    appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount));
    appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb));
    appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
    appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
    appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
    appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
    appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());
    // https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
    appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
    if (keytabResource != null) {
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString());
        String principal = flinkConfiguration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
        appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);
    }
    //To support Yarn Secure Integration Test Scenario
    if (remoteYarnSiteXmlPath != null && remoteKrb5Path != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
        appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());
    }
    if (dynamicPropertiesEncoded != null) {
        appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
    }
    // set classpath from YARN configuration
    Utils.setupYarnClassPath(conf, appMasterEnv);
    amContainer.setEnvironment(appMasterEnv);
    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(jobManagerMemoryMb);
    capability.setVirtualCores(1);
    String name;
    if (customName == null) {
        name = "Flink session with " + taskManagerCount + " TaskManagers";
        if (detached) {
            name += " (detached)";
        }
    } else {
        name = customName;
    }
    appContext.setApplicationName(name);
    appContext.setApplicationType("Apache Flink");
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    if (yarnQueue != null) {
        appContext.setQueue(yarnQueue);
    }
    setApplicationTags(appContext);
    // add a hook to clean up in case deployment fails
    Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication);
    Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
    LOG.info("Submitting application master " + appId);
    yarnClient.submitApplication(appContext);
    LOG.info("Waiting for the cluster to be allocated");
    final long startTime = System.currentTimeMillis();
    ApplicationReport report;
    YarnApplicationState lastAppState = YarnApplicationState.NEW;
    loop: while (true) {
        try {
            report = yarnClient.getApplicationReport(appId);
        } catch (IOException e) {
            throw new YarnDeploymentException("Failed to deploy the cluster.", e);
        }
        YarnApplicationState appState = report.getYarnApplicationState();
        LOG.debug("Application State: {}", appState);
        switch(appState) {
            case FAILED:
            //TODO: the finished state may be valid in flip-6
            case FINISHED:
            case KILLED:
                throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n" + "yarn logs -applicationId " + appId);
            //break ..
            case RUNNING:
                LOG.info("YARN application has been deployed successfully.");
                break loop;
            default:
                if (appState != lastAppState) {
                    LOG.info("Deploying cluster, current state " + appState);
                }
                if (System.currentTimeMillis() - startTime > 60000) {
                    LOG.info("Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");
                }
        }
        lastAppState = appState;
        Thread.sleep(250);
    }
    // print the application id for user to cancel themselves.
    if (isDetachedMode()) {
        LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed.");
    }
    // since deployment was successful, remove the hook
    try {
        Runtime.getRuntime().removeShutdownHook(deploymentFailureHook);
    } catch (IllegalStateException e) {
    // we're already in the shut down hook.
    }
    return report;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) ObjectOutputStream(java.io.ObjectOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) FsPermission(org.apache.hadoop.fs.permission.FsPermission) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) FileVisitResult(java.nio.file.FileVisitResult) IOException(java.io.IOException) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) URISyntaxException(java.net.URISyntaxException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) FileOutputStream(java.io.FileOutputStream) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File)

Example 79 with ApplicationSubmissionContext

use of org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext in project hadoop by apache.

the class TestYarnClient method testSubmitIncorrectQueueToCapacityScheduler.

@Test(timeout = 30000)
public void testSubmitIncorrectQueueToCapacityScheduler() throws IOException {
    MiniYARNCluster cluster = new MiniYARNCluster("testMRAMTokens", 1, 1, 1);
    YarnClient rmClient = null;
    try {
        YarnConfiguration conf = new YarnConfiguration();
        conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getName());
        cluster.init(conf);
        cluster.start();
        final Configuration yarnConf = cluster.getConfig();
        rmClient = YarnClient.createYarnClient();
        rmClient.init(yarnConf);
        rmClient.start();
        YarnClientApplication newApp = rmClient.createApplication();
        ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId();
        // Create launch context for app master
        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
        // set the application id
        appContext.setApplicationId(appId);
        // set the application name
        appContext.setApplicationName("test");
        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue("nonexist");
        // Set up the container launch context for the application master
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        appContext.setAMContainerSpec(amContainer);
        appContext.setResource(Resource.newInstance(1024, 1));
        // appContext.setUnmanagedAM(unmanaged);
        // Submit the application to the applications manager
        rmClient.submitApplication(appContext);
        Assert.fail("Job submission should have thrown an exception");
    } catch (YarnException e) {
        Assert.assertTrue(e.getMessage().contains("Failed to submit"));
    } finally {
        if (rmClient != null) {
            rmClient.stop();
        }
        cluster.stop();
    }
}
Also used : CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) MiniYARNCluster(org.apache.hadoop.yarn.server.MiniYARNCluster) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) Test(org.junit.Test)

Example 80 with ApplicationSubmissionContext

use of org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext in project hadoop by apache.

the class TestYarnClient method testCreateTimelineClientWithError.

private void testCreateTimelineClientWithError(float timelineVersion, boolean timelineServiceEnabled, boolean timelineClientBestEffort, Throwable mockErr, CreateTimelineClientErrorVerifier errVerifier) throws Exception {
    Configuration conf = new Configuration();
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, timelineServiceEnabled);
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_CLIENT_BEST_EFFORT, timelineClientBestEffort);
    conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, timelineVersion);
    YarnClient client = new MockYarnClient();
    if (client instanceof YarnClientImpl) {
        YarnClientImpl impl = (YarnClientImpl) client;
        YarnClientImpl spyClient = spy(impl);
        when(spyClient.createTimelineClient()).thenThrow(mockErr);
        CreateTimelineClientErrorVerifier verifier = spy(errVerifier);
        spyClient.init(conf);
        spyClient.start();
        ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class);
        ContainerLaunchContext containerContext = mock(ContainerLaunchContext.class);
        ApplicationId applicationId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
        when(containerContext.getTokens()).thenReturn(null);
        when(context.getApplicationId()).thenReturn(applicationId);
        when(spyClient.isSecurityEnabled()).thenReturn(true);
        when(context.getAMContainerSpec()).thenReturn(containerContext);
        try {
            spyClient.submitApplication(context);
        } catch (Throwable e) {
            verifier.verifyError(e);
        } finally {
            // Make sure the verifier runs with expected times
            // This is required because in case throwable is swallowed
            // and verifyError never gets the chance to run
            verify(verifier, times(verifier.getExpectedTimes())).verifyError(any(Throwable.class));
            spyClient.stop();
        }
    }
}
Also used : CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient)

Aggregations

ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)86 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)46 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)42 Test (org.junit.Test)29 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)22 Resource (org.apache.hadoop.yarn.api.records.Resource)21 IOException (java.io.IOException)18 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)18 Configuration (org.apache.hadoop.conf.Configuration)15 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)15 Priority (org.apache.hadoop.yarn.api.records.Priority)15 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)15 ByteBuffer (java.nio.ByteBuffer)14 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)14 SubmitApplicationRequest (org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest)13 YarnClientApplication (org.apache.hadoop.yarn.client.api.YarnClientApplication)13 RMAppAttemptMetrics (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics)13 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)12 RMAppAttemptImpl (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl)11 ArrayList (java.util.ArrayList)10