use of io.cdap.cdap.app.runtime.spark.SparkRuntimeContext in project cdap by caskdata.
the class AbstractSparkSubmitter method createSubmitArguments.
/**
* Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
*
* @param runtimeContext the {@link SparkRuntimeContext} for the spark program
* @param configs set of Spark configurations
* @param resources list of resources that needs to be localized to Spark containers
* @param jobFile the job file for Spark
* @return a list of arguments
* @throws Exception if there is error while creating submit arguments
*/
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
SparkSpecification spec = runtimeContext.getSparkSpecification();
ImmutableList.Builder<String> builder = ImmutableList.builder();
Iterable<LocalizeResource> archivesIterable = getArchives(resources);
Iterable<LocalizeResource> filesIterable = getFiles(resources);
addMaster(configs, builder);
builder.add("--conf").add("spark.app.name=" + spec.getName());
configs.putAll(generateSubmitConf());
BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
configs.forEach(confAdder);
String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
if (!Strings.isNullOrEmpty(archives)) {
builder.add("--archives").add(archives);
}
if (!Strings.isNullOrEmpty(files)) {
builder.add("--files").add(files);
}
URI newJobFile = getJobFile();
if (newJobFile != null) {
jobFile = newJobFile;
}
boolean isPySpark = jobFile.getPath().endsWith(".py");
if (isPySpark) {
// For python, add extra py library files
String pyFiles = configs.get("spark.submit.pyFiles");
if (pyFiles != null) {
builder.add("--py-files").add(pyFiles);
}
} else {
builder.add("--class").add(SparkMainWrapper.class.getName());
}
if ("file".equals(jobFile.getScheme())) {
builder.add(jobFile.getPath());
} else {
builder.add(jobFile.toString());
}
if (!isPySpark) {
// Add extra arguments for easily identifying the program from command line.
// Arguments to user program is always coming from the runtime arguments.
builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
builder.add("--cdap.user.main.class=" + spec.getMainClassName());
}
return builder.build();
}
Aggregations