Search in sources :

Example 1 with SparkSubmit

use of org.apache.spark.deploy.SparkSubmit in project cdap by caskdata.

the class AbstractSparkSubmitter method createSubmitArguments.

/**
 * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
 *
 * @param runtimeContext the {@link SparkRuntimeContext} for the spark program
 * @param configs set of Spark configurations
 * @param resources list of resources that needs to be localized to Spark containers
 * @param jobFile the job file for Spark
 * @return a list of arguments
 */
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) {
    SparkSpecification spec = runtimeContext.getSparkSpecification();
    ImmutableList.Builder<String> builder = ImmutableList.builder();
    addMaster(configs, builder);
    builder.add("--conf").add("spark.app.name=" + spec.getName());
    BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
    configs.forEach(confAdder);
    getSubmitConf().forEach(confAdder);
    String archives = Joiner.on(',').join(Iterables.transform(Iterables.filter(resources, ARCHIVE_FILTER), RESOURCE_TO_PATH));
    String files = Joiner.on(',').join(Iterables.transform(Iterables.filter(resources, Predicates.not(ARCHIVE_FILTER)), RESOURCE_TO_PATH));
    if (!archives.isEmpty()) {
        builder.add("--archives").add(archives);
    }
    if (!files.isEmpty()) {
        builder.add("--files").add(files);
    }
    boolean isPySpark = jobFile.getPath().endsWith(".py");
    if (isPySpark) {
        // For python, add extra py library files
        String pyFiles = configs.get("spark.submit.pyFiles");
        if (pyFiles != null) {
            builder.add("--py-files").add(pyFiles);
        }
    } else {
        builder.add("--class").add(SparkMainWrapper.class.getName());
    }
    if ("file".equals(jobFile.getScheme())) {
        builder.add(jobFile.getPath());
    } else {
        builder.add(jobFile.toString());
    }
    if (!isPySpark) {
        // Add extra arguments for easily identifying the program from command line.
        // Arguments to user program is always coming from the runtime arguments.
        builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
        builder.add("--cdap.user.main.class=" + spec.getMainClassName());
    }
    return builder.build();
}
Also used : Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SparkSpecification(co.cask.cdap.api.spark.SparkSpecification) SparkMainWrapper(co.cask.cdap.app.runtime.spark.SparkMainWrapper) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) Predicates(com.google.common.base.Predicates) URI(java.net.URI) ThreadFactory(java.util.concurrent.ThreadFactory) ExecutorService(java.util.concurrent.ExecutorService) ClassLoaders(co.cask.cdap.common.lang.ClassLoaders) Function(com.google.common.base.Function) Uninterruptibles(com.google.common.util.concurrent.Uninterruptibles) Logger(org.slf4j.Logger) SparkSubmit(org.apache.spark.deploy.SparkSubmit) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) SparkRuntimeContext(co.cask.cdap.app.runtime.spark.SparkRuntimeContext) Predicate(com.google.common.base.Predicate) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) Collections(java.util.Collections) AbstractFuture(com.google.common.util.concurrent.AbstractFuture) Joiner(com.google.common.base.Joiner) SparkSpecification(co.cask.cdap.api.spark.SparkSpecification) SparkMainWrapper(co.cask.cdap.app.runtime.spark.SparkMainWrapper) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

SparkSpecification (co.cask.cdap.api.spark.SparkSpecification)1 SparkMainWrapper (co.cask.cdap.app.runtime.spark.SparkMainWrapper)1 SparkRuntimeContext (co.cask.cdap.app.runtime.spark.SparkRuntimeContext)1 ClassLoaders (co.cask.cdap.common.lang.ClassLoaders)1 LocalizeResource (co.cask.cdap.internal.app.runtime.distributed.LocalizeResource)1 Function (com.google.common.base.Function)1 Joiner (com.google.common.base.Joiner)1 Predicate (com.google.common.base.Predicate)1 Predicates (com.google.common.base.Predicates)1 ImmutableList (com.google.common.collect.ImmutableList)1 Iterables (com.google.common.collect.Iterables)1 AbstractFuture (com.google.common.util.concurrent.AbstractFuture)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 Uninterruptibles (com.google.common.util.concurrent.Uninterruptibles)1 URI (java.net.URI)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 CountDownLatch (java.util.concurrent.CountDownLatch)1