use of io.cdap.cdap.app.runtime.spark.classloader.SparkContainerClassLoader in project cdap by caskdata.
the class SparkContainerLauncher method launch.
/**
* Launches the given main class. The main class will be loaded through the {@link SparkContainerClassLoader}.
*
* @param mainClassName the main class to launch
* @param args arguments for the main class
* @param removeMainClass whether to remove the jar for the main class from the classloader
* @param masterEnvName name of the MasterEnvironment used to submit the Spark job. This will be used to setup
* bindings for service discovery and other CDAP capabilities. If null, the default Hadoop implementations will
* be used.
*/
public static void launch(String mainClassName, String[] args, boolean removeMainClass, @Nullable String masterEnvName) throws Exception {
Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
ClassLoader systemClassLoader = ClassLoader.getSystemClassLoader();
Set<URL> urls = ClassLoaders.getClassLoaderURLs(systemClassLoader, new LinkedHashSet<URL>());
// method call from the container launch script.
if (removeMainClass) {
urls.remove(getURLByClass(systemClassLoader, mainClassName));
}
// Remove the first scala from the set of classpath. This ensure the one from Spark is used for spark
removeNonSparkJar(systemClassLoader, "scala.language", urls);
// Remove the first jar containing LZBlockInputStream from the set of classpath.
// The one from Kafka is not compatible with Spark
removeNonSparkJar(systemClassLoader, "net.jpountz.lz4.LZ4BlockInputStream", urls);
// First create a FilterClassLoader that only loads JVM and kafka classes from the system classloader
// This is to isolate the scala library from children
ClassLoader parentClassLoader = new FilterClassLoader(systemClassLoader, KAFKA_FILTER);
boolean rewriteCheckpointTempFileName = Boolean.parseBoolean(System.getProperty(SparkRuntimeUtils.STREAMING_CHECKPOINT_REWRITE_ENABLED, "false"));
// Creates the SparkRunnerClassLoader for class rewriting and it will be used for the rest of the execution.
// Use the extension classloader as the parent instead of the system classloader because
// Spark classes are in the system classloader which we want to rewrite.
ClassLoader classLoader = new SparkContainerClassLoader(urls.toArray(new URL[0]), parentClassLoader, rewriteCheckpointTempFileName);
// Sets the context classloader and launch the actual Spark main class.
Thread.currentThread().setContextClassLoader(classLoader);
// Create SLF4J logger from the context classloader. It has to be created from that classloader in order
// for logs in this class to be in the same context as the one used in Spark.
Object logger = createLogger(classLoader);
// Install the JUL to SLF4J Bridge
try {
classLoader.loadClass(SLF4JBridgeHandler.class.getName()).getDeclaredMethod("install").invoke(null);
} catch (Exception e) {
// Log the error and continue
log(logger, "warn", "Failed to invoke SLF4JBridgeHandler.install() required for jul-to-slf4j bridge", e);
}
// Get the SparkRuntimeContext to initialize all necessary services and logging context
// Need to do it using the SparkRunnerClassLoader through reflection.
Class<?> sparkRuntimeContextProviderClass = classLoader.loadClass(SparkRuntimeContextProvider.class.getName());
if (masterEnvName != null) {
sparkRuntimeContextProviderClass.getMethod("setMasterEnvName", String.class).invoke(null, masterEnvName);
}
Object sparkRuntimeContext = sparkRuntimeContextProviderClass.getMethod("get").invoke(null);
if (sparkRuntimeContext instanceof Closeable) {
System.setSecurityManager(new SparkRuntimeSecurityManager((Closeable) sparkRuntimeContext));
}
try {
// in the PythonRunner/PythonWorkerFactory via SparkClassRewriter.
if (!isPySpark()) {
// Invoke StandardOutErrorRedirector.redirectToLogger()
classLoader.loadClass(StandardOutErrorRedirector.class.getName()).getDeclaredMethod("redirectToLogger", String.class).invoke(null, mainClassName);
}
// which causes executor logs attempt to write to driver log directory
if (System.getProperty("spark.executorEnv.CDAP_LOG_DIR") != null) {
System.setProperty("spark.executorEnv.CDAP_LOG_DIR", "<LOG_DIR>");
}
// Optionally starts Py4j Gateway server in the executor container
Runnable stopGatewayServer = startGatewayServerIfNeeded(classLoader, logger);
try {
log(logger, "info", "Launch main class {}.main({})", mainClassName, Arrays.toString(args));
classLoader.loadClass(mainClassName).getMethod("main", String[].class).invoke(null, new Object[] { args });
log(logger, "info", "Main method returned {}", mainClassName);
} finally {
stopGatewayServer.run();
}
} catch (Throwable t) {
// LOG the exception since this exception will be propagated back to JVM
// and kill the main thread (hence the JVM process).
// If we don't log it here as ERROR, it will be logged by UncaughtExceptionHandler as DEBUG level
log(logger, "error", "Exception raised when calling {}.main(String[]) method", mainClassName, t);
throw t;
} finally {
if (sparkRuntimeContext instanceof Closeable) {
Closeables.closeQuietly((Closeable) sparkRuntimeContext);
}
}
}
Aggregations