Search in sources :

Example 1 with SparkRuntimeContext

use of io.cdap.cdap.app.runtime.spark.SparkRuntimeContext in project cdap by caskdata.

the class AbstractSparkSubmitter method createSubmitArguments.

/**
 * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
 *
 * @param runtimeContext the {@link SparkRuntimeContext} for the spark program
 * @param configs set of Spark configurations
 * @param resources list of resources that needs to be localized to Spark containers
 * @param jobFile the job file for Spark
 * @return a list of arguments
 * @throws Exception if there is error while creating submit arguments
 */
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
    SparkSpecification spec = runtimeContext.getSparkSpecification();
    ImmutableList.Builder<String> builder = ImmutableList.builder();
    Iterable<LocalizeResource> archivesIterable = getArchives(resources);
    Iterable<LocalizeResource> filesIterable = getFiles(resources);
    addMaster(configs, builder);
    builder.add("--conf").add("spark.app.name=" + spec.getName());
    configs.putAll(generateSubmitConf());
    BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
    configs.forEach(confAdder);
    String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
    String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
    if (!Strings.isNullOrEmpty(archives)) {
        builder.add("--archives").add(archives);
    }
    if (!Strings.isNullOrEmpty(files)) {
        builder.add("--files").add(files);
    }
    URI newJobFile = getJobFile();
    if (newJobFile != null) {
        jobFile = newJobFile;
    }
    boolean isPySpark = jobFile.getPath().endsWith(".py");
    if (isPySpark) {
        // For python, add extra py library files
        String pyFiles = configs.get("spark.submit.pyFiles");
        if (pyFiles != null) {
            builder.add("--py-files").add(pyFiles);
        }
    } else {
        builder.add("--class").add(SparkMainWrapper.class.getName());
    }
    if ("file".equals(jobFile.getScheme())) {
        builder.add(jobFile.getPath());
    } else {
        builder.add(jobFile.toString());
    }
    if (!isPySpark) {
        // Add extra arguments for easily identifying the program from command line.
        // Arguments to user program is always coming from the runtime arguments.
        builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
        builder.add("--cdap.user.main.class=" + spec.getMainClassName());
    }
    return builder.build();
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Strings(com.google.common.base.Strings) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) Predicates(com.google.common.base.Predicates) URI(java.net.URI) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Function(com.google.common.base.Function) Uninterruptibles(com.google.common.util.concurrent.Uninterruptibles) SparkRuntimeContext(io.cdap.cdap.app.runtime.spark.SparkRuntimeContext) Logger(org.slf4j.Logger) ClassLoaders(io.cdap.cdap.common.lang.ClassLoaders) SparkSubmit(org.apache.spark.deploy.SparkSubmit) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Predicate(com.google.common.base.Predicate) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) Collections(java.util.Collections) AbstractFuture(com.google.common.util.concurrent.AbstractFuture) Joiner(com.google.common.base.Joiner) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) ImmutableList(com.google.common.collect.ImmutableList) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) URI(java.net.URI)

Aggregations

Function (com.google.common.base.Function)1 Joiner (com.google.common.base.Joiner)1 Predicate (com.google.common.base.Predicate)1 Predicates (com.google.common.base.Predicates)1 Strings (com.google.common.base.Strings)1 ImmutableList (com.google.common.collect.ImmutableList)1 Iterables (com.google.common.collect.Iterables)1 AbstractFuture (com.google.common.util.concurrent.AbstractFuture)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 Uninterruptibles (com.google.common.util.concurrent.Uninterruptibles)1 SparkSpecification (io.cdap.cdap.api.spark.SparkSpecification)1 SparkMainWrapper (io.cdap.cdap.app.runtime.spark.SparkMainWrapper)1 SparkRuntimeContext (io.cdap.cdap.app.runtime.spark.SparkRuntimeContext)1 ClassLoaders (io.cdap.cdap.common.lang.ClassLoaders)1 LocalizeResource (io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource)1 URI (java.net.URI)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1