use of io.cdap.cdap.app.runtime.spark.classloader.SparkRunnerClassLoader in project cdap by caskdata.
the class SparkProgramRuntimeProvider method createClassLoader.
/**
* Returns an array of {@link URL} being used by the {@link ClassLoader} of this {@link Class}.
*/
private synchronized SparkRunnerClassLoader createClassLoader(boolean filterScalaClasses, boolean rewriteYarnClient, boolean rewriteCheckpointTempName) throws IOException {
// Determine if needs to filter Scala classes or not.
FilterClassLoader filteredBaseParent = new FilterClassLoader(getClass().getClassLoader(), createClassFilter());
ClassLoader runnerParentClassLoader = filterScalaClasses ? new ScalaFilterClassLoader(filteredBaseParent) : filteredBaseParent;
if (classLoaderUrls == null) {
classLoaderUrls = getSparkClassloaderURLs(getClass().getClassLoader());
}
return new SparkRunnerClassLoader(classLoaderUrls, runnerParentClassLoader, rewriteYarnClient, rewriteCheckpointTempName);
}
use of io.cdap.cdap.app.runtime.spark.classloader.SparkRunnerClassLoader in project cdap by caskdata.
the class SparkProgramRuntimeProvider method createProgramRunner.
@Override
public ProgramRunner createProgramRunner(ProgramType type, Mode mode, Injector injector) {
Preconditions.checkArgument(type == ProgramType.SPARK, "Unsupported program type %s. Only %s is supported", type, ProgramType.SPARK);
CConfiguration conf = injector.getInstance(CConfiguration.class);
boolean rewriteCheckpointTempFileName = conf.getBoolean(SparkRuntimeUtils.SPARK_STREAMING_CHECKPOINT_REWRITE_ENABLED);
switch(mode) {
case LOCAL:
// Rewrite YarnClient based on config. The LOCAL runner is used in both SDK and distributed mode
// The actual mode that Spark is running is determined by the cdap.spark.cluster.mode attribute
// in the hConf
boolean rewriteYarnClient = conf.getBoolean(Constants.AppFabric.SPARK_YARN_CLIENT_REWRITE);
try {
SparkRunnerClassLoader classLoader = createClassLoader(filterScalaClasses, rewriteYarnClient, rewriteCheckpointTempFileName);
try {
// TODO: CDAP-5506 to refactor the program runtime architecture to remove the need of this assumption
return createSparkProgramRunner(createRunnerInjector(injector, classLoader), SparkProgramRunner.class.getName(), classLoader);
} catch (Throwable t) {
// If there is any exception, close the classloader
Closeables.closeQuietly(classLoader);
throw t;
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
case DISTRIBUTED:
// The distributed program runner is only used by the CDAP master to launch the twill container
// hence it doesn't need to do any class rewrite.
// We only create the SparkRunnerClassLoader once and keep reusing it since in the CDAP master, there is
// no SparkContext being created, hence no need to provide runtime isolation.
// This also limits the amount of permgen usage to be constant in the CDAP master regardless of how
// many Spark programs are running. We never need to close the SparkRunnerClassLoader until process shutdown.
ClassLoader classLoader = getDistributedRunnerClassLoader(rewriteCheckpointTempFileName);
return createSparkProgramRunner(createRunnerInjector(injector, classLoader), DistributedSparkProgramRunner.class.getName(), classLoader);
default:
throw new IllegalArgumentException("Unsupported Spark execution mode " + mode);
}
}
Aggregations