use of io.cdap.cdap.common.lang.FilterClassLoader in project cdap by caskdata.
the class AccessControllerClassLoader method createParent.
@VisibleForTesting
static ClassLoader createParent() {
ClassLoader baseClassLoader = AccessControllerClassLoader.class.getClassLoader();
final Set<String> accessControllerResources = traceSecurityDependencies(baseClassLoader);
// by default, FilterClassLoader's defaultFilter allows all hadoop classes, which makes it so that
// the access controller extension can share the same instance of UserGroupInformation. This allows kerberos
// credential renewal to also renew for any extension
final FilterClassLoader.Filter defaultFilter = FilterClassLoader.defaultFilter();
return new FilterClassLoader(baseClassLoader, new FilterClassLoader.Filter() {
@Override
public boolean acceptResource(String resource) {
return defaultFilter.acceptResource(resource) || accessControllerResources.contains(resource);
}
@Override
public boolean acceptPackage(String packageName) {
return true;
}
});
}
use of io.cdap.cdap.common.lang.FilterClassLoader in project cdap by caskdata.
the class SparkProgramRuntimeProvider method createClassLoader.
/**
* Returns an array of {@link URL} being used by the {@link ClassLoader} of this {@link Class}.
*/
private synchronized SparkRunnerClassLoader createClassLoader(boolean filterScalaClasses, boolean rewriteYarnClient, boolean rewriteCheckpointTempName) throws IOException {
// Determine if needs to filter Scala classes or not.
FilterClassLoader filteredBaseParent = new FilterClassLoader(getClass().getClassLoader(), createClassFilter());
ClassLoader runnerParentClassLoader = filterScalaClasses ? new ScalaFilterClassLoader(filteredBaseParent) : filteredBaseParent;
if (classLoaderUrls == null) {
classLoaderUrls = getSparkClassloaderURLs(getClass().getClassLoader());
}
return new SparkRunnerClassLoader(classLoaderUrls, runnerParentClassLoader, rewriteYarnClient, rewriteCheckpointTempName);
}
use of io.cdap.cdap.common.lang.FilterClassLoader in project cdap by caskdata.
the class MainClassLoader method createFromContext.
/**
* @param filter A {@link FilterClassLoader.Filter} for filtering out classes from the
* @param extraClasspath extra list of {@link URL} to be added to the end of the classpath for the
* {@link MainClassLoader} to be created
* @return a new instance from the current context classloader or the system classloader. The returned
* {@link MainClassLoader} will be the defining classloader for classes in the context classloader
* that the filter rejected. For classes that pass the filter, the defining classloader will be the original
* context classloader.
* It will return {@code null} if it is not able to create a new instance due to lack of classpath information.
*/
@Nullable
public static MainClassLoader createFromContext(FilterClassLoader.Filter filter, URL... extraClasspath) {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
if (classLoader == null) {
classLoader = ClassLoader.getSystemClassLoader();
}
List<URL> classpath = new ArrayList<>();
if (classLoader instanceof URLClassLoader) {
classpath.addAll(Arrays.asList(((URLClassLoader) classLoader).getURLs()));
} else if (classLoader == ClassLoader.getSystemClassLoader()) {
addClassPath(classpath);
} else {
// No able to create a new MainClassLoader
return null;
}
classpath.addAll(Arrays.asList(extraClasspath));
// Find and move hive-exec to the end. The hive-exec contains a lot of conflicting classes that we don't
// want to include during dependency tracing.
Iterator<URL> iterator = classpath.iterator();
List<URL> hiveExecJars = new ArrayList<>();
while (iterator.hasNext()) {
URL url = iterator.next();
if (url.getPath().contains("hive-exec")) {
iterator.remove();
hiveExecJars.add(url);
}
}
classpath.addAll(hiveExecJars);
ClassLoader filtered = new FilterClassLoader(classLoader, filter);
ClassLoader parent = new CombineClassLoader(classLoader.getParent(), filtered);
return new MainClassLoader(classpath.toArray(new URL[0]), parent);
}
use of io.cdap.cdap.common.lang.FilterClassLoader in project cdap by caskdata.
the class SparkRuntimeContextProvider method createProgram.
private static Program createProgram(CConfiguration cConf, SparkRuntimeContextConfig contextConfig) throws IOException {
File programJar = new File(PROGRAM_JAR_NAME);
File programDir = new File(PROGRAM_JAR_EXPANDED_NAME);
ClassLoader parentClassLoader = new FilterClassLoader(SparkRuntimeContextProvider.class.getClassLoader(), SparkResourceFilters.SPARK_PROGRAM_CLASS_LOADER_FILTER);
ClassLoader classLoader = new ProgramClassLoader(cConf, programDir, parentClassLoader);
return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), Locations.toLocation(programJar), classLoader);
}
use of io.cdap.cdap.common.lang.FilterClassLoader in project cdap by caskdata.
the class SparkContainerLauncher method launch.
/**
* Launches the given main class. The main class will be loaded through the {@link SparkContainerClassLoader}.
*
* @param mainClassName the main class to launch
* @param args arguments for the main class
* @param removeMainClass whether to remove the jar for the main class from the classloader
* @param masterEnvName name of the MasterEnvironment used to submit the Spark job. This will be used to setup
* bindings for service discovery and other CDAP capabilities. If null, the default Hadoop implementations will
* be used.
*/
public static void launch(String mainClassName, String[] args, boolean removeMainClass, @Nullable String masterEnvName) throws Exception {
Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
ClassLoader systemClassLoader = ClassLoader.getSystemClassLoader();
Set<URL> urls = ClassLoaders.getClassLoaderURLs(systemClassLoader, new LinkedHashSet<URL>());
// method call from the container launch script.
if (removeMainClass) {
urls.remove(getURLByClass(systemClassLoader, mainClassName));
}
// Remove the first scala from the set of classpath. This ensure the one from Spark is used for spark
removeNonSparkJar(systemClassLoader, "scala.language", urls);
// Remove the first jar containing LZBlockInputStream from the set of classpath.
// The one from Kafka is not compatible with Spark
removeNonSparkJar(systemClassLoader, "net.jpountz.lz4.LZ4BlockInputStream", urls);
// First create a FilterClassLoader that only loads JVM and kafka classes from the system classloader
// This is to isolate the scala library from children
ClassLoader parentClassLoader = new FilterClassLoader(systemClassLoader, KAFKA_FILTER);
boolean rewriteCheckpointTempFileName = Boolean.parseBoolean(System.getProperty(SparkRuntimeUtils.STREAMING_CHECKPOINT_REWRITE_ENABLED, "false"));
// Creates the SparkRunnerClassLoader for class rewriting and it will be used for the rest of the execution.
// Use the extension classloader as the parent instead of the system classloader because
// Spark classes are in the system classloader which we want to rewrite.
ClassLoader classLoader = new SparkContainerClassLoader(urls.toArray(new URL[0]), parentClassLoader, rewriteCheckpointTempFileName);
// Sets the context classloader and launch the actual Spark main class.
Thread.currentThread().setContextClassLoader(classLoader);
// Create SLF4J logger from the context classloader. It has to be created from that classloader in order
// for logs in this class to be in the same context as the one used in Spark.
Object logger = createLogger(classLoader);
// Install the JUL to SLF4J Bridge
try {
classLoader.loadClass(SLF4JBridgeHandler.class.getName()).getDeclaredMethod("install").invoke(null);
} catch (Exception e) {
// Log the error and continue
log(logger, "warn", "Failed to invoke SLF4JBridgeHandler.install() required for jul-to-slf4j bridge", e);
}
// Get the SparkRuntimeContext to initialize all necessary services and logging context
// Need to do it using the SparkRunnerClassLoader through reflection.
Class<?> sparkRuntimeContextProviderClass = classLoader.loadClass(SparkRuntimeContextProvider.class.getName());
if (masterEnvName != null) {
sparkRuntimeContextProviderClass.getMethod("setMasterEnvName", String.class).invoke(null, masterEnvName);
}
Object sparkRuntimeContext = sparkRuntimeContextProviderClass.getMethod("get").invoke(null);
if (sparkRuntimeContext instanceof Closeable) {
System.setSecurityManager(new SparkRuntimeSecurityManager((Closeable) sparkRuntimeContext));
}
try {
// in the PythonRunner/PythonWorkerFactory via SparkClassRewriter.
if (!isPySpark()) {
// Invoke StandardOutErrorRedirector.redirectToLogger()
classLoader.loadClass(StandardOutErrorRedirector.class.getName()).getDeclaredMethod("redirectToLogger", String.class).invoke(null, mainClassName);
}
// which causes executor logs attempt to write to driver log directory
if (System.getProperty("spark.executorEnv.CDAP_LOG_DIR") != null) {
System.setProperty("spark.executorEnv.CDAP_LOG_DIR", "<LOG_DIR>");
}
// Optionally starts Py4j Gateway server in the executor container
Runnable stopGatewayServer = startGatewayServerIfNeeded(classLoader, logger);
try {
log(logger, "info", "Launch main class {}.main({})", mainClassName, Arrays.toString(args));
classLoader.loadClass(mainClassName).getMethod("main", String[].class).invoke(null, new Object[] { args });
log(logger, "info", "Main method returned {}", mainClassName);
} finally {
stopGatewayServer.run();
}
} catch (Throwable t) {
// LOG the exception since this exception will be propagated back to JVM
// and kill the main thread (hence the JVM process).
// If we don't log it here as ERROR, it will be logged by UncaughtExceptionHandler as DEBUG level
log(logger, "error", "Exception raised when calling {}.main(String[]) method", mainClassName, t);
throw t;
} finally {
if (sparkRuntimeContext instanceof Closeable) {
Closeables.closeQuietly((Closeable) sparkRuntimeContext);
}
}
}
Aggregations