use of io.cdap.cdap.api.spark.Spark in project cdap by caskdata.
the class DefaultAppConfigurer method addSpark.
@Override
public void addSpark(Spark spark) {
Preconditions.checkArgument(spark != null, "Spark cannot be null.");
DefaultSparkConfigurer configurer = null;
// It is a bit hacky here to look for the DefaultExtendedSparkConfigurer implementation through the
// SparkRunnerClassloader directly (CDAP-11797)
ClassLoader sparkRunnerClassLoader = ClassLoaders.findByName(spark.getClass().getClassLoader(), "io.cdap.cdap.app.runtime.spark.classloader.SparkRunnerClassLoader");
if (sparkRunnerClassLoader != null) {
try {
configurer = (DefaultSparkConfigurer) sparkRunnerClassLoader.loadClass("io.cdap.cdap.app.deploy.spark.DefaultExtendedSparkConfigurer").getConstructor(Spark.class, Id.Namespace.class, Id.Artifact.class, PluginFinder.class, PluginInstantiator.class, AppDeploymentRuntimeInfo.class, FeatureFlagsProvider.class).newInstance(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
} catch (Exception e) {
// Ignore it and the configurer will be defaulted to DefaultSparkConfigurer
LOG.trace("No DefaultExtendedSparkConfigurer found. Fallback to DefaultSparkConfigurer.", e);
}
}
if (configurer == null) {
configurer = new DefaultSparkConfigurer(spark, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, getFeatureFlagsProvider());
}
spark.configure(configurer);
addDatasetsAndPlugins(configurer);
SparkSpecification spec = configurer.createSpecification();
sparks.put(spec.getName(), spec);
}
use of io.cdap.cdap.api.spark.Spark in project cdap by caskdata.
the class ExternalSparkProgram method configure.
@Override
protected void configure() {
setClientResources(phaseSpec.getClientResources());
setDriverResources(phaseSpec.getDriverResources());
setExecutorResources(phaseSpec.getResources());
// register the plugins at program level so that the program can be failed by the platform early in case of
// plugin requirements not being meet
phaseSpec.getPhase().registerPlugins(getConfigurer(), runtimeConfigurer, deployedNamespace);
PluginSpec pluginSpec = stageSpec.getPlugin();
PluginProperties pluginProperties = PluginProperties.builder().addAll(pluginSpec.getProperties()).build();
// use a UUID as plugin ID so that it doesn't clash with anything. Only using the class here to
// check which main class is needed
// TODO: clean this up so that we only get the class once and store it in the PluginSpec instead of getting
// it in the pipeline spec generator and here
Object sparkPlugin = usePlugin(pluginSpec.getType(), pluginSpec.getName(), UUID.randomUUID().toString(), pluginProperties);
if (sparkPlugin == null) {
// should never happen, should have been checked before by the pipeline spec generator
throw new IllegalStateException(String.format("No plugin found of type %s and name %s for stage %s", pluginSpec.getType(), pluginSpec.getName(), STAGE_NAME));
}
if (Spark.class.isAssignableFrom(sparkPlugin.getClass())) {
// TODO: Pass in a forwarding configurer so that we can capture the properties set by the plugin
// However the usage is very limited as the plugin can always use plugin config to preserve properties
((Spark) sparkPlugin).configure(getConfigurer());
} else if (SparkMain.class.isAssignableFrom(sparkPlugin.getClass())) {
setMainClass(ScalaSparkMainWrapper.class);
} else {
setMainClass(JavaSparkMainWrapper.class);
}
setName(phaseSpec.getPhaseName());
Map<String, String> properties = new HashMap<>();
properties.put(STAGE_NAME, stageSpec.getName());
properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec, BatchPhaseSpec.class));
setProperties(properties);
}
use of io.cdap.cdap.api.spark.Spark in project cdap by caskdata.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
LOG.trace("Starting Spark program {} with SparkProgramRunner of ClassLoader {}", program.getId(), getClass().getClassLoader());
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, metricsCollectionService, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, accessEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory, metadataReader, metadataPublisher, namespaceQueryAdmin, fieldLineageWriter, remoteClientFactory, () -> {
});
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
boolean isLocal = SparkRuntimeContextConfig.isLocal(options);
SparkSubmitter submitter;
// If MasterEnvironment is not available, use non-master env spark submitters
MasterEnvironment masterEnv = MasterEnvironments.getMasterEnvironment();
if (masterEnv != null && cConf.getBoolean(Constants.Environment.PROGRAM_SUBMISSION_MASTER_ENV_ENABLED, true)) {
submitter = new MasterEnvironmentSparkSubmitter(cConf, locationFactory, host, runtimeContext, masterEnv);
} else {
submitter = isLocal ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
}
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory, isLocal, fieldLineageWriter, masterEnv);
sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (isLocal || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAllQuietly(closeables);
throw Throwables.propagate(t);
}
}
use of io.cdap.cdap.api.spark.Spark in project cdap by caskdata.
the class SparkRuntimeService method destroy.
/**
* Calls the destroy or onFinish method of {@link ProgramLifecycle}.
*/
private void destroy(final ProgramState state) {
context.setState(state);
TransactionControl defaultTxControl = runtimeContext.getDefaultTxControl();
TransactionControl txControl = spark instanceof ProgramLifecycle ? Transactions.getTransactionControl(defaultTxControl, Spark.class, spark, "destroy") : defaultTxControl;
runtimeContext.destroyProgram(programLifecycle, txControl, false);
if (emitFieldLineage()) {
try {
// here we cannot call context.flushRecord() since the WorkflowNodeState will need to record and store
// the lineage information
FieldLineageInfo info = new FieldLineageInfo(runtimeContext.getFieldLineageOperations());
fieldLineageWriter.write(runtimeContext.getProgramRunId(), info);
} catch (Throwable t) {
LOG.warn("Failed to emit the field lineage operations for Spark {}", runtimeContext.getProgramRunId(), t);
}
}
}
Aggregations