Search in sources :

Example 1 with ClientExt

use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.

the class SparkYarnClient method submitSparkSqlJobForBatch.

/**
 * 执行spark 批处理sql
 * @param jobClient
 * @return
 */
private JobResult submitSparkSqlJobForBatch(JobClient jobClient) {
    Properties confProp = jobClient.getConfProperties();
    setHadoopUserName(sparkYarnConfig);
    Map<String, Object> paramsMap = new HashMap<>();
    String zipSql = DtStringUtil.zip(jobClient.getSql());
    paramsMap.put("sql", zipSql);
    paramsMap.put("appName", jobClient.getJobName());
    paramsMap.put("sparkSessionConf", getSparkSessionConf(confProp));
    String logLevel = MathUtil.getString(confProp.get(LOG_LEVEL_KEY));
    if (StringUtils.isNotEmpty(logLevel)) {
        paramsMap.put("logLevel", logLevel);
    }
    String sqlExeJson = null;
    try {
        sqlExeJson = PublicUtil.objToString(paramsMap);
        sqlExeJson = URLEncoder.encode(sqlExeJson, Charsets.UTF_8.name());
    } catch (Exception e) {
        logger.error("", e);
        throw new PluginDefineException("get unexpected exception:" + e.getMessage());
    }
    String sqlProxyClass = sparkYarnConfig.getSparkSqlProxyMainClass();
    List<String> argList = new ArrayList<>();
    argList.add("--jar");
    argList.add(sparkYarnConfig.getSparkSqlProxyPath());
    argList.add("--class");
    argList.add(sqlProxyClass);
    argList.add("--arg");
    argList.add(sqlExeJson);
    ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
    SparkConf sparkConf = buildBasicSparkConf(jobClient);
    sparkConf.setAppName(jobClient.getJobName());
    setSparkLog4jLocalFilePath(sparkConf, jobClient);
    fillExtSparkConf(sparkConf, confProp);
    setSparkLog4jConfiguration(sparkConf);
    ApplicationId appId = null;
    try {
        ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
        clientExt.setSparkYarnConfig(sparkYarnConfig);
        String proxyUserName = sparkYarnConfig.getDtProxyUserName();
        if (StringUtils.isNotBlank(proxyUserName)) {
            logger.info("ugi proxyUser is {}", proxyUserName);
            appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
        } else {
            appId = clientExt.submitApplication(jobClient.getApplicationPriority());
        }
        return JobResult.createSuccessResult(appId.toString());
    } catch (Exception ex) {
        return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
    }
}
Also used : ClientExt(com.dtstack.taier.sparkyarn.sparkext.ClientExt) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) ClientArguments(org.apache.spark.deploy.yarn.ClientArguments) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SparkConf(org.apache.spark.SparkConf)

Example 2 with ClientExt

use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.

the class SparkYarnClient method submitJobWithJar.

private JobResult submitJobWithJar(JobClient jobClient) {
    setHadoopUserName(sparkYarnConfig);
    JobParam jobParam = new JobParam(jobClient);
    String mainClass = jobParam.getMainClass();
    // 只支持hdfs
    String jarPath = jobParam.getJarPath();
    String appName = jobParam.getJobName();
    String exeArgsStr = jobParam.getClassArgs();
    if (!jarPath.startsWith(HDFS_PREFIX)) {
        throw new PluginDefineException("spark jar path protocol must be " + HDFS_PREFIX);
    }
    if (Strings.isNullOrEmpty(appName)) {
        throw new PluginDefineException("spark jar must set app name!");
    }
    String[] appArgs = new String[] {};
    if (StringUtils.isNotBlank(exeArgsStr)) {
        appArgs = exeArgsStr.split("\\s+");
    }
    List<String> argList = new ArrayList<>();
    argList.add("--jar");
    argList.add(jarPath);
    argList.add("--class");
    argList.add(mainClass);
    for (String appArg : appArgs) {
        if (StringUtils.isBlank(appArg)) {
            continue;
        }
        argList.add("--arg");
        argList.add(appArg);
    }
    ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
    SparkConf sparkConf = buildBasicSparkConf(jobClient);
    sparkConf.setAppName(appName);
    setSparkLog4jLocalFilePath(sparkConf, jobClient);
    fillExtSparkConf(sparkConf, jobClient.getConfProperties());
    setSparkLog4jConfiguration(sparkConf);
    ApplicationId appId = null;
    try {
        ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
        clientExt.setSparkYarnConfig(sparkYarnConfig);
        String proxyUserName = sparkYarnConfig.getDtProxyUserName();
        if (StringUtils.isNotBlank(proxyUserName)) {
            logger.info("jobId {} ugi proxyUser is {}", jobClient.getJobId(), proxyUserName);
            appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
        } else {
            appId = clientExt.submitApplication(jobClient.getApplicationPriority());
        }
        return JobResult.createSuccessResult(appId.toString());
    } catch (Exception ex) {
        logger.info("", ex);
        return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
    }
}
Also used : ClientExt(com.dtstack.taier.sparkyarn.sparkext.ClientExt) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) ClientArguments(org.apache.spark.deploy.yarn.ClientArguments) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SparkConf(org.apache.spark.SparkConf) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException)

Example 3 with ClientExt

use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.

the class SparkYarnClient method submitPythonJob.

private JobResult submitPythonJob(JobClient jobClient) {
    setHadoopUserName(sparkYarnConfig);
    JobParam jobParam = new JobParam(jobClient);
    // .py .egg .zip 存储的hdfs路径
    String pyFilePath = jobParam.getJarPath();
    String appName = jobParam.getJobName();
    String exeArgsStr = jobParam.getClassArgs();
    if (Strings.isNullOrEmpty(pyFilePath)) {
        return JobResult.createErrorResult("exe python file can't be null.");
    }
    if (Strings.isNullOrEmpty(appName)) {
        return JobResult.createErrorResult("an application name must be set in your configuration");
    }
    ApplicationId appId = null;
    List<String> argList = new ArrayList<>();
    argList.add("--primary-py-file");
    argList.add(pyFilePath);
    argList.add("--class");
    argList.add(PYTHON_RUNNER_CLASS);
    String[] appArgs = new String[] {};
    if (StringUtils.isNotBlank(exeArgsStr)) {
        appArgs = exeArgsStr.split("\\s+");
    }
    String dependencyResource = "";
    boolean nextIsDependencyVal = false;
    for (String appArg : appArgs) {
        if (nextIsDependencyVal) {
            dependencyResource = appArg;
            nextIsDependencyVal = false;
            continue;
        }
        if (PYTHON_RUNNER_DEPENDENCY_RES_KEY.equals(appArg)) {
            nextIsDependencyVal = true;
            continue;
        }
        argList.add("--arg");
        argList.add(appArg);
        nextIsDependencyVal = false;
    }
    String pythonExtPath = sparkYarnConfig.getSparkPythonExtLibPath();
    if (Strings.isNullOrEmpty(pythonExtPath)) {
        return JobResult.createErrorResult("engine node.yml setting error, " + "commit spark python job need to set param of sparkPythonExtLibPath.");
    }
    // 添加自定义的依赖包
    if (!Strings.isNullOrEmpty(dependencyResource)) {
        pythonExtPath = pythonExtPath + "," + dependencyResource;
    }
    SparkConf sparkConf = buildBasicSparkConf(jobClient);
    // set  spark executor env.
    List<String> args = Arrays.asList(appArgs);
    int appEnvIndex = args.indexOf(AppEnvConstant.APP_ENV);
    if (appEnvIndex != -1) {
        try {
            String appEnv = args.get(appEnvIndex + 1);
            parseAppEnv(appEnv, sparkConf);
        } catch (Exception e) {
            return JobResult.createErrorResult("Could't set appEnv to spark executor env. parsePythonCmd failed. " + "Reason :" + e.getMessage());
        }
    }
    sparkConf.set("spark.submit.pyFiles", pythonExtPath);
    sparkConf.setAppName(appName);
    setSparkLog4jLocalFilePath(sparkConf, jobClient);
    fillExtSparkConf(sparkConf, jobClient.getConfProperties());
    setSparkLog4jConfiguration(sparkConf);
    try {
        ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
        ClientExt clientExt = new ClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
        clientExt.setSparkYarnConfig(sparkYarnConfig);
        String proxyUserName = sparkYarnConfig.getDtProxyUserName();
        if (StringUtils.isNotBlank(proxyUserName)) {
            logger.info("ugi proxyUser is {}", proxyUserName);
            appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
        } else {
            appId = clientExt.submitApplication(jobClient.getApplicationPriority());
        }
        return JobResult.createSuccessResult(appId.toString());
    } catch (Exception ex) {
        logger.info("", ex);
        return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
    }
}
Also used : ClientExt(com.dtstack.taier.sparkyarn.sparkext.ClientExt) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IOException(java.io.IOException) PluginDefineException(com.dtstack.taier.pluginapi.exception.PluginDefineException) ClientArguments(org.apache.spark.deploy.yarn.ClientArguments) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) SparkConf(org.apache.spark.SparkConf)

Aggregations

PluginDefineException (com.dtstack.taier.pluginapi.exception.PluginDefineException)3 ClientExt (com.dtstack.taier.sparkyarn.sparkext.ClientExt)3 IOException (java.io.IOException)3 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 SparkConf (org.apache.spark.SparkConf)3 ClientArguments (org.apache.spark.deploy.yarn.ClientArguments)3