use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class DefaultAppConfigurer method addSpark.
@Override
public void addSpark(Spark spark) {
Preconditions.checkArgument(spark != null, "Spark cannot be null.");
DefaultSparkConfigurer configurer = null;
// It is a bit hacky here to look for the DefaultExtendedSparkConfigurer implementation through the
// SparkRunnerClassloader directly (CDAP-11797)
ClassLoader sparkRunnerClassLoader = ClassLoaders.findByName(spark.getClass().getClassLoader(), "io.cdap.cdap.app.runtime.spark.classloader.SparkRunnerClassLoader");
if (sparkRunnerClassLoader != null) {
try {
configurer = (DefaultSparkConfigurer) sparkRunnerClassLoader.loadClass("io.cdap.cdap.app.deploy.spark.DefaultExtendedSparkConfigurer").getConstructor(Spark.class, Id.Namespace.class, Id.Artifact.class, PluginFinder.class, PluginInstantiator.class, AppDeploymentRuntimeInfo.class, FeatureFlagsProvider.class).newInstance(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
} catch (Exception e) {
// Ignore it and the configurer will be defaulted to DefaultSparkConfigurer
LOG.trace("No DefaultExtendedSparkConfigurer found. Fallback to DefaultSparkConfigurer.", e);
}
}
if (configurer == null) {
configurer = new DefaultSparkConfigurer(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
}
spark.configure(configurer);
addDatasetsAndPlugins(configurer);
SparkSpecification spec = configurer.createSpecification();
sparks.put(spec.getName(), spec);
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class DataStreamsPipelineSpecGenerator method generateSpec.
@Override
public DataStreamsPipelineSpec generateSpec(DataStreamsConfig config) throws ValidationException {
long batchIntervalMillis;
try {
batchIntervalMillis = TimeParser.parseDuration(config.getBatchInterval());
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Unable to parse batchInterval '%s'", config.getBatchInterval()));
}
String pipelineId = UUID.randomUUID().toString();
if (runtimeConfigurer != null && runtimeConfigurer.getDeployedApplicationSpec() != null) {
SparkSpecification sparkSpec = runtimeConfigurer.getDeployedApplicationSpec().getSpark().get(DataStreamsSparkLauncher.NAME);
DataStreamsPipelineSpec spec = GSON.fromJson(sparkSpec.getProperty(Constants.PIPELINEID), DataStreamsPipelineSpec.class);
pipelineId = spec.getPipelineId();
}
DataStreamsPipelineSpec.Builder specBuilder = DataStreamsPipelineSpec.builder(batchIntervalMillis, pipelineId).setExtraJavaOpts(config.getExtraJavaOpts()).setStopGracefully(config.getStopGracefully()).setIsUnitTest(config.isUnitTest()).setCheckpointsDisabled(config.checkpointsDisabled());
String checkpointDir = config.getCheckpointDir();
if (!config.checkpointsDisabled() && checkpointDir != null) {
try {
new Path(checkpointDir);
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Checkpoint directory '%s' is not a valid Path: %s", checkpointDir, e.getMessage()), e);
}
specBuilder.setCheckpointDirectory(checkpointDir);
}
configureStages(config, specBuilder);
return specBuilder.build();
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
LOG.trace("Starting Spark program {} with SparkProgramRunner of ClassLoader {}", program.getId(), getClass().getClassLoader());
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, metricsCollectionService, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory, () -> {
});
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
boolean isLocal = SparkRuntimeContextConfig.isLocal(options);
SparkSubmitter submitter;
// If MasterEnvironment is not available, use non-master env spark submitters
MasterEnvironment masterEnv = MasterEnvironments.getMasterEnvironment();
if (masterEnv != null && cConf.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
submitter = new MasterEnvironmentSparkSubmitter(cConf, locationFactory, host, runtimeContext, masterEnv);
} else {
submitter = isLocal ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
}
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory, isLocal, fieldLineageWriter, masterEnv);
sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (isLocal || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAllQuietly(closeables);
throw Throwables.propagate(t);
}
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class AbstractSparkSubmitter method createSubmitArguments.
/**
* Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
*
* @param runtimeContext the {@link SparkRuntimeContext} for the spark program
* @param configs set of Spark configurations
* @param resources list of resources that needs to be localized to Spark containers
* @param jobFile the job file for Spark
* @return a list of arguments
* @throws Exception if there is error while creating submit arguments
*/
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
SparkSpecification spec = runtimeContext.getSparkSpecification();
ImmutableList.Builder<String> builder = ImmutableList.builder();
Iterable<LocalizeResource> archivesIterable = getArchives(resources);
Iterable<LocalizeResource> filesIterable = getFiles(resources);
addMaster(configs, builder);
builder.add("--conf").add("spark.app.name=" + spec.getName());
configs.putAll(generateSubmitConf());
BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
configs.forEach(confAdder);
String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
if (!Strings.isNullOrEmpty(archives)) {
builder.add("--archives").add(archives);
}
if (!Strings.isNullOrEmpty(files)) {
builder.add("--files").add(files);
}
URI newJobFile = getJobFile();
if (newJobFile != null) {
jobFile = newJobFile;
}
boolean isPySpark = jobFile.getPath().endsWith(".py");
if (isPySpark) {
// For python, add extra py library files
String pyFiles = configs.get("spark.submit.pyFiles");
if (pyFiles != null) {
builder.add("--py-files").add(pyFiles);
}
} else {
builder.add("--class").add(SparkMainWrapper.class.getName());
}
if ("file".equals(jobFile.getScheme())) {
builder.add(jobFile.getPath());
} else {
builder.add(jobFile.toString());
}
if (!isPySpark) {
// Add extra arguments for easily identifying the program from command line.
// Arguments to user program is always coming from the runtime arguments.
builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
builder.add("--cdap.user.main.class=" + spec.getMainClassName());
}
return builder.build();
}
use of io.cdap.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class SparkSpecificationCodec method deserialize.
@Override
public SparkSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
JsonObject jsonObj = json.getAsJsonObject();
String className = jsonObj.get("className").getAsString();
String name = jsonObj.get("name").getAsString();
String description = jsonObj.get("description").getAsString();
Map<String, Plugin> plugins = deserializeMap(jsonObj.get("plugins"), context, Plugin.class);
String mainClassName = jsonObj.has("mainClassName") ? jsonObj.get("mainClassName").getAsString() : null;
Set<String> datasets = deserializeSet(jsonObj.get("datasets"), context, String.class);
Map<String, String> properties = deserializeMap(jsonObj.get("properties"), context, String.class);
Resources clientResources = deserializeResources(jsonObj, "client", context);
Resources driverResources = deserializeResources(jsonObj, "driver", context);
Resources executorResources = deserializeResources(jsonObj, "executor", context);
List<SparkHttpServiceHandlerSpecification> handlers = deserializeList(jsonObj.get("handlers"), context, SparkHttpServiceHandlerSpecification.class);
return new SparkSpecification(className, name, description, mainClassName, datasets, properties, clientResources, driverResources, executorResources, handlers, plugins);
}
Aggregations