use of co.cask.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class AbstractSparkSubmitter method submit.
@Override
public final <V> ListenableFuture<V> submit(final SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, File jobJar, final V result) {
final SparkSpecification spec = runtimeContext.getSparkSpecification();
final List<String> args = createSubmitArguments(spec, configs, resources, jobJar);
// Spark submit is called from this executor
// Use an executor to simplify logic that is needed to interrupt the running thread on stopping
final ExecutorService executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
return new Thread(r, "spark-submitter-" + spec.getName() + "-" + runtimeContext.getRunId());
}
});
// Latch for the Spark job completion
final CountDownLatch completion = new CountDownLatch(1);
final SparkJobFuture<V> resultFuture = new SparkJobFuture<V>(runtimeContext) {
@Override
protected void cancelTask() {
// Try to shutdown the running spark job.
triggerShutdown();
// Wait for the Spark-Submit returns
Uninterruptibles.awaitUninterruptibly(completion);
}
};
// Submit the Spark job
executor.submit(new Runnable() {
@Override
public void run() {
List<String> extraArgs = beforeSubmit();
try {
String[] submitArgs = Iterables.toArray(Iterables.concat(args, extraArgs), String.class);
submit(runtimeContext, submitArgs);
onCompleted(true);
resultFuture.set(result);
} catch (Throwable t) {
onCompleted(false);
resultFuture.setException(t);
} finally {
completion.countDown();
}
}
});
// Shutdown the executor right after submit since the thread is only used for one submission.
executor.shutdown();
return resultFuture;
}
use of co.cask.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class SparkProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
// Get the RunId first. It is used for the creation of the ClassLoader closing thread.
Arguments arguments = options.getArguments();
RunId runId = ProgramRunners.getRunId(options);
Deque<Closeable> closeables = new LinkedList<>();
try {
// Extract and verify parameters
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");
SparkSpecification spec = appSpec.getSpark().get(program.getName());
Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());
String host = options.getArguments().getOption(ProgramOptionConstants.HOST);
Preconditions.checkArgument(host != null, "No hostname is provided");
// Get the WorkflowProgramInfo if it is started by Workflow
WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
if (pluginInstantiator != null) {
closeables.addFirst(pluginInstantiator);
}
SparkRuntimeContext runtimeContext = new SparkRuntimeContext(new Configuration(hConf), program, options, cConf, host, txClient, programDatasetFramework, discoveryServiceClient, metricsCollectionService, streamAdmin, workflowInfo, pluginInstantiator, secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService, serviceAnnouncer, pluginFinder, locationFactory);
closeables.addFirst(runtimeContext);
Spark spark;
try {
spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
} catch (Exception e) {
LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
throw Throwables.propagate(e);
}
SparkSubmitter submitter = SparkRuntimeContextConfig.isLocal(hConf) ? new LocalSparkSubmitter() : new DistributedSparkSubmitter(hConf, locationFactory, host, runtimeContext, options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE));
Service sparkRuntimeService = new SparkRuntimeService(cConf, spark, getPluginArchive(options), runtimeContext, submitter, locationFactory);
sparkRuntimeService.addListener(createRuntimeServiceListener(closeables), Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new SparkProgramController(sparkRuntimeService, runtimeContext);
LOG.debug("Starting Spark Job. Context: {}", runtimeContext);
if (SparkRuntimeContextConfig.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
sparkRuntimeService.start();
} else {
ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), sparkRuntimeService);
}
return controller;
} catch (Throwable t) {
closeAllQuietly(closeables);
throw Throwables.propagate(t);
}
}
use of co.cask.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class SparkRuntimeContext method getSparkSpecification.
private static SparkSpecification getSparkSpecification(Program program) {
SparkSpecification spec = program.getApplicationSpecification().getSpark().get(program.getName());
// Spec shouldn't be null, otherwise the spark program won't even get started
Preconditions.checkState(spec != null, "SparkSpecification not found for %s", program.getId());
return spec;
}
use of co.cask.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class AbstractSparkSubmitter method createSubmitArguments.
/**
* Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
*
* @param runtimeContext the {@link SparkRuntimeContext} for the spark program
* @param configs set of Spark configurations
* @param resources list of resources that needs to be localized to Spark containers
* @param jobFile the job file for Spark
* @return a list of arguments
*/
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) {
SparkSpecification spec = runtimeContext.getSparkSpecification();
ImmutableList.Builder<String> builder = ImmutableList.builder();
addMaster(configs, builder);
builder.add("--conf").add("spark.app.name=" + spec.getName());
BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
configs.forEach(confAdder);
getSubmitConf().forEach(confAdder);
String archives = Joiner.on(',').join(Iterables.transform(Iterables.filter(resources, ARCHIVE_FILTER), RESOURCE_TO_PATH));
String files = Joiner.on(',').join(Iterables.transform(Iterables.filter(resources, Predicates.not(ARCHIVE_FILTER)), RESOURCE_TO_PATH));
if (!archives.isEmpty()) {
builder.add("--archives").add(archives);
}
if (!files.isEmpty()) {
builder.add("--files").add(files);
}
boolean isPySpark = jobFile.getPath().endsWith(".py");
if (isPySpark) {
// For python, add extra py library files
String pyFiles = configs.get("spark.submit.pyFiles");
if (pyFiles != null) {
builder.add("--py-files").add(pyFiles);
}
} else {
builder.add("--class").add(SparkMainWrapper.class.getName());
}
if ("file".equals(jobFile.getScheme())) {
builder.add(jobFile.getPath());
} else {
builder.add(jobFile.toString());
}
if (!isPySpark) {
// Add extra arguments for easily identifying the program from command line.
// Arguments to user program is always coming from the runtime arguments.
builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
builder.add("--cdap.user.main.class=" + spec.getMainClassName());
}
return builder.build();
}
use of co.cask.cdap.api.spark.SparkSpecification in project cdap by caskdata.
the class DefaultSparkConfigurer method createSpecification.
public SparkSpecification createSpecification() {
Set<String> datasets = new HashSet<>();
// Grab all @Property and @Dataset fields
Reflections.visit(spark, spark.getClass(), new PropertyFieldExtractor(properties), new DataSetFieldExtractor(datasets));
return new SparkSpecification(spark.getClass().getName(), name, description, mainClassName, datasets, properties, clientResources, driverResources, executorResources, getHandlers());
}
Aggregations