Examples with SparkSpecification - io.cdap.cdap.api.spark.SparkSpecification

Example 1 with SparkSpecification

use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.

the class DefaultAppConfigurer method addSpark.

@Override
public void addSpark(Spark spark) {
    Preconditions.checkArgument(spark != null, "Spark cannot be null.");
    DefaultSparkConfigurer configurer = null;
    // It is a bit hacky here to look for the DefaultExtendedSparkConfigurer implementation through the
    // SparkRunnerClassloader directly (CDAP-11797)
    ClassLoader sparkRunnerClassLoader = ClassLoaders.findByName(spark.getClass().getClassLoader(), "io.cdap.cdap.app.runtime.spark.classloader.SparkRunnerClassLoader");
    if (sparkRunnerClassLoader != null) {
        try {
            configurer = (DefaultSparkConfigurer) sparkRunnerClassLoader.loadClass("io.cdap.cdap.app.deploy.spark.DefaultExtendedSparkConfigurer").getConstructor(Spark.class, Id.Namespace.class, Id.Artifact.class, PluginFinder.class, PluginInstantiator.class, AppDeploymentRuntimeInfo.class, FeatureFlagsProvider.class).newInstance(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
        } catch (Exception e) {
            // Ignore it and the configurer will be defaulted to DefaultSparkConfigurer
            LOG.trace("No DefaultExtendedSparkConfigurer found. Fallback to DefaultSparkConfigurer.", e);
        }
    }
    if (configurer == null) {
        configurer = new DefaultSparkConfigurer(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
    }
    spark.configure(configurer);
    addDatasetsAndPlugins(configurer);
    SparkSpecification spec = configurer.createSpecification();
    sparks.put(spec.getName(), spec);
}

Also used : SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) DefaultSparkConfigurer(io.cdap.cdap.internal.app.spark.DefaultSparkConfigurer) PluginFinder(io.cdap.cdap.internal.app.runtime.artifact.PluginFinder) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) Spark(io.cdap.cdap.api.spark.Spark) FeatureFlagsProvider(io.cdap.cdap.api.feature.FeatureFlagsProvider) AppDeploymentRuntimeInfo(io.cdap.cdap.internal.app.deploy.pipeline.AppDeploymentRuntimeInfo)

Example 2 with SparkSpecification

use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.

the class DataStreamsPipelineSpecGenerator method generateSpec.

@Override
public DataStreamsPipelineSpec generateSpec(DataStreamsConfig config) throws ValidationException {
    long batchIntervalMillis;
    try {
        batchIntervalMillis = TimeParser.parseDuration(config.getBatchInterval());
    } catch (Exception e) {
        throw new IllegalArgumentException(String.format("Unable to parse batchInterval '%s'", config.getBatchInterval()));
    }
    String pipelineId = UUID.randomUUID().toString();
    if (runtimeConfigurer != null && runtimeConfigurer.getDeployedApplicationSpec() != null) {
        SparkSpecification sparkSpec = runtimeConfigurer.getDeployedApplicationSpec().getSpark().get(DataStreamsSparkLauncher.NAME);
        DataStreamsPipelineSpec spec = GSON.fromJson(sparkSpec.getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
        pipelineId = spec.getPipelineId();
    }
    DataStreamsPipelineSpec.Builder specBuilder = DataStreamsPipelineSpec.builder(batchIntervalMillis, pipelineId).setExtraJavaOpts(config.getExtraJavaOpts()).setStopGracefully(config.getStopGracefully()).setIsUnitTest(config.isUnitTest()).setCheckpointsDisabled(config.checkpointsDisabled());
    String checkpointDir = config.getCheckpointDir();
    if (!config.checkpointsDisabled() && checkpointDir != null) {
        try {
            new Path(checkpointDir);
        } catch (Exception e) {
            throw new IllegalArgumentException(String.format("Checkpoint directory '%s' is not a valid Path: %s", checkpointDir, e.getMessage()), e);
        }
        specBuilder.setCheckpointDirectory(checkpointDir);
    }
    configureStages(config, specBuilder);
    return specBuilder.build();
}

Also used : Path(org.apache.hadoop.fs.Path) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException)

Example 3 with SparkSpecification

use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.

the class SparkProgramRunner method run.

@Override
public ProgramController run(Program program, ProgramOptions options) {
    LOG.trace("Starting Spark program {} with SparkProgramRunner of ClassLoader {}", program.getId(), getClass().getClassLoader());
    // Get the RunId first. It is used for the creation of the ClassLoader closing thread.
    Arguments arguments = options.getArguments();
    RunId runId = ProgramRunners.getRunId(options);
    Deque<Closeable> closeables = new LinkedList<>();
    try {
        // Extract and verify parameters
        ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification.");
        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type.");
        Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
        SparkSpecification spec = appSpec.getSpark().get(program.getName());
        Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
        String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
        Preconditions.checkArgument(host != null, "No hostname is provided");
        // Get the WorkflowProgramInfo if it is started by Workflow
        WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
        DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
        // Setup dataset framework context, if required
        if (programDatasetFramework instanceof ProgramContextAware) {
            ProgramId programId = program.getId();
            ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
        }
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.addFirst(pluginInstantiator);
        }
        SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, metricsCollectionService, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory, () -> {
        });
        closeables.addFirst(runtimeContext);
        Spark spark;
        try {
            spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
        } catch (Exception e) {
            LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
            throw Throwables.propagate(e);
        }
        boolean isLocal = SparkRuntimeContextConfig.isLocal(options);
        SparkSubmitter submitter;
        // If MasterEnvironment is not available, use non-master env spark submitters
        MasterEnvironment masterEnv = MasterEnvironments.getMasterEnvironment();
        if (masterEnv != null && cConf.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
            submitter = new MasterEnvironmentSparkSubmitter(cConf, locationFactory, host, runtimeContext, masterEnv);
        } else {
            submitter = isLocal ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
        }
        Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory, isLocal, fieldLineageWriter, masterEnv);
        sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
        LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
        if (isLocal || UserGroupInformation.isSecurityEnabled()) {
            sparkRuntimeService.start();
        } else {
            ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
        }
        return controller;
    } catch (Throwable t) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(t);
    }
}

Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) MasterEnvironmentSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.MasterEnvironmentSparkSubmitter) MasterEnvironmentSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.MasterEnvironmentSparkSubmitter) LocalSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.LocalSparkSubmitter) SparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.SparkSubmitter) DistributedSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.DistributedSparkSubmitter) Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) Closeable(java.io.Closeable) DistributedSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.DistributedSparkSubmitter) DatasetFramework(io.cdap.cdap.data2.dataset2.DatasetFramework) NameMappedDatasetFramework(io.cdap.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) InstantiatorFactory(io.cdap.cdap.common.lang.InstantiatorFactory) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) ProgramType(io.cdap.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramController(io.cdap.cdap.app.runtime.ProgramController) Arguments(io.cdap.cdap.app.runtime.Arguments) MessagingService(io.cdap.cdap.messaging.MessagingService) Service(com.google.common.util.concurrent.Service) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) ProgramId(io.cdap.cdap.proto.id.ProgramId) BasicProgramContext(io.cdap.cdap.internal.app.runtime.BasicProgramContext) LinkedList(java.util.LinkedList) IOException(java.io.IOException) WorkflowProgramInfo(io.cdap.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) MasterEnvironment(io.cdap.cdap.master.spi.environment.MasterEnvironment) PluginInstantiator(io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator) Spark(io.cdap.cdap.api.spark.Spark) LocalSparkSubmitter(io.cdap.cdap.app.runtime.spark.submit.LocalSparkSubmitter) ProgramContextAware(io.cdap.cdap.data.ProgramContextAware)

Example 4 with SparkSpecification

use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.

the class AbstractSparkSubmitter method createSubmitArguments.

/**
 * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
 *
 * @param runtimeContext the {@link SparkRuntimeContext} for the spark program
 * @param configs set of Spark configurations
 * @param resources list of resources that needs to be localized to Spark containers
 * @param jobFile the job file for Spark
 * @return a list of arguments
 * @throws Exception if there is error while creating submit arguments
 */
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
    SparkSpecification spec = runtimeContext.getSparkSpecification();
    ImmutableList.Builder<String> builder = ImmutableList.builder();
    Iterable<LocalizeResource> archivesIterable = getArchives(resources);
    Iterable<LocalizeResource> filesIterable = getFiles(resources);
    addMaster(configs, builder);
    builder.add("--conf").add("spark.app.name=" + spec.getName());
    configs.putAll(generateSubmitConf());
    BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
    configs.forEach(confAdder);
    String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
    String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
    if (!Strings.isNullOrEmpty(archives)) {
        builder.add("--archives").add(archives);
    }
    if (!Strings.isNullOrEmpty(files)) {
        builder.add("--files").add(files);
    }
    URI newJobFile = getJobFile();
    if (newJobFile != null) {
        jobFile = newJobFile;
    }
    boolean isPySpark = jobFile.getPath().endsWith(".py");
    if (isPySpark) {
        // For python, add extra py library files
        String pyFiles = configs.get("spark.submit.pyFiles");
        if (pyFiles != null) {
            builder.add("--py-files").add(pyFiles);
        }
    } else {
        builder.add("--class").add(SparkMainWrapper.class.getName());
    }
    if ("file".equals(jobFile.getScheme())) {
        builder.add(jobFile.getPath());
    } else {
        builder.add(jobFile.toString());
    }
    if (!isPySpark) {
        // Add extra arguments for easily identifying the program from command line.
        // Arguments to user program is always coming from the runtime arguments.
        builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
        builder.add("--cdap.user.main.class=" + spec.getMainClassName());
    }
    return builder.build();
}

Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Strings(com.google.common.base.Strings) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) Predicates(com.google.common.base.Predicates) URI(java.net.URI) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Function(com.google.common.base.Function) Uninterruptibles(com.google.common.util.concurrent.Uninterruptibles) SparkRuntimeContext(io.cdap.cdap.app.runtime.spark.SparkRuntimeContext) Logger(org.slf4j.Logger) ClassLoaders(io.cdap.cdap.common.lang.ClassLoaders) SparkSubmit(org.apache.spark.deploy.SparkSubmit) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Predicate(com.google.common.base.Predicate) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) Collections(java.util.Collections) AbstractFuture(com.google.common.util.concurrent.AbstractFuture) Joiner(com.google.common.base.Joiner) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) ImmutableList(com.google.common.collect.ImmutableList) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) URI(java.net.URI)

Example 5 with SparkSpecification

use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.

the class SparkSpecificationCodec method deserialize.

@Override
public SparkSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
    JsonObject jsonObj = json.getAsJsonObject();
    String className = jsonObj.get("className").getAsString();
    String name = jsonObj.get("name").getAsString();
    String description = jsonObj.get("description").getAsString();
    Map<String, Plugin> plugins = deserializeMap(jsonObj.get("plugins"), context, Plugin.class);
    String mainClassName = jsonObj.has("mainClassName") ? jsonObj.get("mainClassName").getAsString() : null;
    Set<String> datasets = deserializeSet(jsonObj.get("datasets"), context, String.class);
    Map<String, String> properties = deserializeMap(jsonObj.get("properties"), context, String.class);
    Resources clientResources = deserializeResources(jsonObj, "client", context);
    Resources driverResources = deserializeResources(jsonObj, "driver", context);
    Resources executorResources = deserializeResources(jsonObj, "executor", context);
    List<SparkHttpServiceHandlerSpecification> handlers = deserializeList(jsonObj.get("handlers"), context, SparkHttpServiceHandlerSpecification.class);
    return new SparkSpecification(className, name, description, mainClassName, datasets, properties, clientResources, driverResources, executorResources, handlers, plugins);
}

Also used : SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) JsonObject(com.google.gson.JsonObject) Resources(io.cdap.cdap.api.Resources) SparkHttpServiceHandlerSpecification(io.cdap.cdap.api.spark.SparkHttpServiceHandlerSpecification) Plugin(io.cdap.cdap.api.plugin.Plugin)

Aggregations

SparkSpecification (io.cdap.cdap.api.spark.SparkSpecification)12 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)4 ImmutableList (com.google.common.collect.ImmutableList)2 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)2 JsonObject (com.google.gson.JsonObject)2 MapReduceSpecification (io.cdap.cdap.api.mapreduce.MapReduceSpecification)2 Plugin (io.cdap.cdap.api.plugin.Plugin)2 ServiceSpecification (io.cdap.cdap.api.service.ServiceSpecification)2 Spark (io.cdap.cdap.api.spark.Spark)2 LocalizeResource (io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource)2 PluginInstantiator (io.cdap.cdap.internal.app.runtime.plugin.PluginInstantiator)2 Function (com.google.common.base.Function)1 Joiner (com.google.common.base.Joiner)1 Predicate (com.google.common.base.Predicate)1 Predicates (com.google.common.base.Predicates)1 Strings (com.google.common.base.Strings)1 Iterables (com.google.common.collect.Iterables)1 AbstractFuture (com.google.common.util.concurrent.AbstractFuture)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 Service (com.google.common.util.concurrent.Service)1