Search in sources :

Example 1 with SparkContainerClassLoader

use of io.cdap.cdap.app.runtime.spark.classloader.SparkContainerClassLoader in project cdap by caskdata.

the class SparkContainerLauncher method launch.

/**
 * Launches the given main class. The main class will be loaded through the {@link SparkContainerClassLoader}.
 *
 * @param mainClassName the main class to launch
 * @param args arguments for the main class
 * @param removeMainClass whether to remove the jar for the main class from the classloader
 * @param masterEnvName name of the MasterEnvironment used to submit the Spark job. This will be used to setup
 *   bindings for service discovery and other CDAP capabilities. If null, the default Hadoop implementations will
 *   be used.
 */
public static void launch(String mainClassName, String[] args, boolean removeMainClass, @Nullable String masterEnvName) throws Exception {
    Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
    ClassLoader systemClassLoader = ClassLoader.getSystemClassLoader();
    Set<URL> urls = ClassLoaders.getClassLoaderURLs(systemClassLoader, new LinkedHashSet<URL>());
    // method call from the container launch script.
    if (removeMainClass) {
        urls.remove(getURLByClass(systemClassLoader, mainClassName));
    }
    // Remove the first scala from the set of classpath. This ensure the one from Spark is used for spark
    removeNonSparkJar(systemClassLoader, "scala.language", urls);
    // Remove the first jar containing LZBlockInputStream from the set of classpath.
    // The one from Kafka is not compatible with Spark
    removeNonSparkJar(systemClassLoader, "net.jpountz.lz4.LZ4BlockInputStream", urls);
    // First create a FilterClassLoader that only loads JVM and kafka classes from the system classloader
    // This is to isolate the scala library from children
    ClassLoader parentClassLoader = new FilterClassLoader(systemClassLoader, KAFKA_FILTER);
    boolean rewriteCheckpointTempFileName = Boolean.parseBoolean(System.getProperty(SparkRuntimeUtils.STREAMING_CHECKPOINT_REWRITE_ENABLED, "false"));
    // Creates the SparkRunnerClassLoader for class rewriting and it will be used for the rest of the execution.
    // Use the extension classloader as the parent instead of the system classloader because
    // Spark classes are in the system classloader which we want to rewrite.
    ClassLoader classLoader = new SparkContainerClassLoader(urls.toArray(new URL[0]), parentClassLoader, rewriteCheckpointTempFileName);
    // Sets the context classloader and launch the actual Spark main class.
    Thread.currentThread().setContextClassLoader(classLoader);
    // Create SLF4J logger from the context classloader. It has to be created from that classloader in order
    // for logs in this class to be in the same context as the one used in Spark.
    Object logger = createLogger(classLoader);
    // Install the JUL to SLF4J Bridge
    try {
        classLoader.loadClass(SLF4JBridgeHandler.class.getName()).getDeclaredMethod("install").invoke(null);
    } catch (Exception e) {
        // Log the error and continue
        log(logger, "warn", "Failed to invoke SLF4JBridgeHandler.install() required for jul-to-slf4j bridge", e);
    }
    // Get the SparkRuntimeContext to initialize all necessary services and logging context
    // Need to do it using the SparkRunnerClassLoader through reflection.
    Class<?> sparkRuntimeContextProviderClass = classLoader.loadClass(SparkRuntimeContextProvider.class.getName());
    if (masterEnvName != null) {
        sparkRuntimeContextProviderClass.getMethod("setMasterEnvName", String.class).invoke(null, masterEnvName);
    }
    Object sparkRuntimeContext = sparkRuntimeContextProviderClass.getMethod("get").invoke(null);
    if (sparkRuntimeContext instanceof Closeable) {
        System.setSecurityManager(new SparkRuntimeSecurityManager((Closeable) sparkRuntimeContext));
    }
    try {
        // in the PythonRunner/PythonWorkerFactory via SparkClassRewriter.
        if (!isPySpark()) {
            // Invoke StandardOutErrorRedirector.redirectToLogger()
            classLoader.loadClass(StandardOutErrorRedirector.class.getName()).getDeclaredMethod("redirectToLogger", String.class).invoke(null, mainClassName);
        }
        // which causes executor logs attempt to write to driver log directory
        if (System.getProperty("spark.executorEnv.CDAP_LOG_DIR") != null) {
            System.setProperty("spark.executorEnv.CDAP_LOG_DIR", "<LOG_DIR>");
        }
        // Optionally starts Py4j Gateway server in the executor container
        Runnable stopGatewayServer = startGatewayServerIfNeeded(classLoader, logger);
        try {
            log(logger, "info", "Launch main class {}.main({})", mainClassName, Arrays.toString(args));
            classLoader.loadClass(mainClassName).getMethod("main", String[].class).invoke(null, new Object[] { args });
            log(logger, "info", "Main method returned {}", mainClassName);
        } finally {
            stopGatewayServer.run();
        }
    } catch (Throwable t) {
        // LOG the exception since this exception will be propagated back to JVM
        // and kill the main thread (hence the JVM process).
        // If we don't log it here as ERROR, it will be logged by UncaughtExceptionHandler as DEBUG level
        log(logger, "error", "Exception raised when calling {}.main(String[]) method", mainClassName, t);
        throw t;
    } finally {
        if (sparkRuntimeContext instanceof Closeable) {
            Closeables.closeQuietly((Closeable) sparkRuntimeContext);
        }
    }
}
Also used : FilterClassLoader(io.cdap.cdap.common.lang.FilterClassLoader) Closeable(java.io.Closeable) URL(java.net.URL) URISyntaxException(java.net.URISyntaxException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) SLF4JBridgeHandler(org.slf4j.bridge.SLF4JBridgeHandler) SparkRuntimeContextProvider(io.cdap.cdap.app.runtime.spark.SparkRuntimeContextProvider) SparkContainerClassLoader(io.cdap.cdap.app.runtime.spark.classloader.SparkContainerClassLoader) StandardOutErrorRedirector(io.cdap.cdap.common.logging.StandardOutErrorRedirector) SparkContainerClassLoader(io.cdap.cdap.app.runtime.spark.classloader.SparkContainerClassLoader) FilterClassLoader(io.cdap.cdap.common.lang.FilterClassLoader) UncaughtExceptionHandler(io.cdap.cdap.common.logging.common.UncaughtExceptionHandler)

Aggregations

SparkRuntimeContextProvider (io.cdap.cdap.app.runtime.spark.SparkRuntimeContextProvider)1 SparkContainerClassLoader (io.cdap.cdap.app.runtime.spark.classloader.SparkContainerClassLoader)1 FilterClassLoader (io.cdap.cdap.common.lang.FilterClassLoader)1 StandardOutErrorRedirector (io.cdap.cdap.common.logging.StandardOutErrorRedirector)1 UncaughtExceptionHandler (io.cdap.cdap.common.logging.common.UncaughtExceptionHandler)1 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 MalformedURLException (java.net.MalformedURLException)1 URISyntaxException (java.net.URISyntaxException)1 URL (java.net.URL)1 SLF4JBridgeHandler (org.slf4j.bridge.SLF4JBridgeHandler)1