use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.
the class SparkYarnClient method submitSparkSqlJobForBatch.
/**
* 执行spark 批处理sql
* @param jobClient
* @return
*/
private JobResult submitSparkSqlJobForBatch(JobClient jobClient) {
Properties confProp = jobClient.getConfProperties();
setHadoopUserName(sparkYarnConfig);
Map<String, Object> paramsMap = new HashMap<>();
String zipSql = DtStringUtil.zip(jobClient.getSql());
paramsMap.put("sql", zipSql);
paramsMap.put("appName", jobClient.getJobName());
paramsMap.put("sparkSessionConf", getSparkSessionConf(confProp));
String logLevel = MathUtil.getString(confProp.get(LOG_LEVEL_KEY));
if (StringUtils.isNotEmpty(logLevel)) {
paramsMap.put("logLevel", logLevel);
}
String sqlExeJson = null;
try {
sqlExeJson = PublicUtil.objToString(paramsMap);
sqlExeJson = URLEncoder.encode(sqlExeJson, Charsets.UTF_8.name());
} catch (Exception e) {
logger.error("", e);
throw new PluginDefineException("get unexpected exception:" + e.getMessage());
}
String sqlProxyClass = sparkYarnConfig.getSparkSqlProxyMainClass();
List<String> argList = new ArrayList<>();
argList.add("--jar");
argList.add(sparkYarnConfig.getSparkSqlProxyPath());
argList.add("--class");
argList.add(sqlProxyClass);
argList.add("--arg");
argList.add(sqlExeJson);
ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
SparkConf sparkConf = buildBasicSparkConf(jobClient);
sparkConf.setAppName(jobClient.getJobName());
setSparkLog4jLocalFilePath(sparkConf, jobClient);
fillExtSparkConf(sparkConf, confProp);
setSparkLog4jConfiguration(sparkConf);
ApplicationId appId = null;
try {
ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
clientExt.setSparkYarnConfig(sparkYarnConfig);
String proxyUserName = sparkYarnConfig.getDtProxyUserName();
if (StringUtils.isNotBlank(proxyUserName)) {
logger.info("ugi proxyUser is {}", proxyUserName);
appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
} else {
appId = clientExt.submitApplication(jobClient.getApplicationPriority());
}
return JobResult.createSuccessResult(appId.toString());
} catch (Exception ex) {
return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
}
}
use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.
the class SparkYarnClient method submitJobWithJar.
private JobResult submitJobWithJar(JobClient jobClient) {
setHadoopUserName(sparkYarnConfig);
JobParam jobParam = new JobParam(jobClient);
String mainClass = jobParam.getMainClass();
// 只支持hdfs
String jarPath = jobParam.getJarPath();
String appName = jobParam.getJobName();
String exeArgsStr = jobParam.getClassArgs();
if (!jarPath.startsWith(HDFS_PREFIX)) {
throw new PluginDefineException("spark jar path protocol must be " + HDFS_PREFIX);
}
if (Strings.isNullOrEmpty(appName)) {
throw new PluginDefineException("spark jar must set app name!");
}
String[] appArgs = new String[] {};
if (StringUtils.isNotBlank(exeArgsStr)) {
appArgs = exeArgsStr.split("\\s+");
}
List<String> argList = new ArrayList<>();
argList.add("--jar");
argList.add(jarPath);
argList.add("--class");
argList.add(mainClass);
for (String appArg : appArgs) {
if (StringUtils.isBlank(appArg)) {
continue;
}
argList.add("--arg");
argList.add(appArg);
}
ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
SparkConf sparkConf = buildBasicSparkConf(jobClient);
sparkConf.setAppName(appName);
setSparkLog4jLocalFilePath(sparkConf, jobClient);
fillExtSparkConf(sparkConf, jobClient.getConfProperties());
setSparkLog4jConfiguration(sparkConf);
ApplicationId appId = null;
try {
ClientExt clientExt = ClientExtFactory.getClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
clientExt.setSparkYarnConfig(sparkYarnConfig);
String proxyUserName = sparkYarnConfig.getDtProxyUserName();
if (StringUtils.isNotBlank(proxyUserName)) {
logger.info("jobId {} ugi proxyUser is {}", jobClient.getJobId(), proxyUserName);
appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
} else {
appId = clientExt.submitApplication(jobClient.getApplicationPriority());
}
return JobResult.createSuccessResult(appId.toString());
} catch (Exception ex) {
logger.info("", ex);
return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
}
}
use of com.dtstack.taier.sparkyarn.sparkext.ClientExt in project Taier by DTStack.
the class SparkYarnClient method submitPythonJob.
private JobResult submitPythonJob(JobClient jobClient) {
setHadoopUserName(sparkYarnConfig);
JobParam jobParam = new JobParam(jobClient);
// .py .egg .zip 存储的hdfs路径
String pyFilePath = jobParam.getJarPath();
String appName = jobParam.getJobName();
String exeArgsStr = jobParam.getClassArgs();
if (Strings.isNullOrEmpty(pyFilePath)) {
return JobResult.createErrorResult("exe python file can't be null.");
}
if (Strings.isNullOrEmpty(appName)) {
return JobResult.createErrorResult("an application name must be set in your configuration");
}
ApplicationId appId = null;
List<String> argList = new ArrayList<>();
argList.add("--primary-py-file");
argList.add(pyFilePath);
argList.add("--class");
argList.add(PYTHON_RUNNER_CLASS);
String[] appArgs = new String[] {};
if (StringUtils.isNotBlank(exeArgsStr)) {
appArgs = exeArgsStr.split("\\s+");
}
String dependencyResource = "";
boolean nextIsDependencyVal = false;
for (String appArg : appArgs) {
if (nextIsDependencyVal) {
dependencyResource = appArg;
nextIsDependencyVal = false;
continue;
}
if (PYTHON_RUNNER_DEPENDENCY_RES_KEY.equals(appArg)) {
nextIsDependencyVal = true;
continue;
}
argList.add("--arg");
argList.add(appArg);
nextIsDependencyVal = false;
}
String pythonExtPath = sparkYarnConfig.getSparkPythonExtLibPath();
if (Strings.isNullOrEmpty(pythonExtPath)) {
return JobResult.createErrorResult("engine node.yml setting error, " + "commit spark python job need to set param of sparkPythonExtLibPath.");
}
// 添加自定义的依赖包
if (!Strings.isNullOrEmpty(dependencyResource)) {
pythonExtPath = pythonExtPath + "," + dependencyResource;
}
SparkConf sparkConf = buildBasicSparkConf(jobClient);
// set spark executor env.
List<String> args = Arrays.asList(appArgs);
int appEnvIndex = args.indexOf(AppEnvConstant.APP_ENV);
if (appEnvIndex != -1) {
try {
String appEnv = args.get(appEnvIndex + 1);
parseAppEnv(appEnv, sparkConf);
} catch (Exception e) {
return JobResult.createErrorResult("Could't set appEnv to spark executor env. parsePythonCmd failed. " + "Reason :" + e.getMessage());
}
}
sparkConf.set("spark.submit.pyFiles", pythonExtPath);
sparkConf.setAppName(appName);
setSparkLog4jLocalFilePath(sparkConf, jobClient);
fillExtSparkConf(sparkConf, jobClient.getConfProperties());
setSparkLog4jConfiguration(sparkConf);
try {
ClientArguments clientArguments = new ClientArguments(argList.toArray(new String[argList.size()]));
ClientExt clientExt = new ClientExt(filesystemManager, clientArguments, yarnConf, sparkConf);
clientExt.setSparkYarnConfig(sparkYarnConfig);
String proxyUserName = sparkYarnConfig.getDtProxyUserName();
if (StringUtils.isNotBlank(proxyUserName)) {
logger.info("ugi proxyUser is {}", proxyUserName);
appId = UserGroupInformation.createProxyUser(proxyUserName, UserGroupInformation.getLoginUser()).doAs((PrivilegedExceptionAction<ApplicationId>) () -> clientExt.submitApplication(jobClient.getApplicationPriority()));
} else {
appId = clientExt.submitApplication(jobClient.getApplicationPriority());
}
return JobResult.createSuccessResult(appId.toString());
} catch (Exception ex) {
logger.info("", ex);
return JobResult.createErrorResult("submit job get unknown error\n" + ExceptionUtil.getErrorMessage(ex));
}
}
Aggregations