Search in sources :

Example 1 with Pool

use of org.apache.spark.scheduler.Pool in project zeppelin by apache.

the class SparkInterpreter method open.

@Override
public void open() {
    // set properties and do login before creating any spark stuff for secured cluster
    if (isYarnMode()) {
        System.setProperty("SPARK_YARN_MODE", "true");
    }
    if (getProperty().containsKey("spark.yarn.keytab") && getProperty().containsKey("spark.yarn.principal")) {
        try {
            String keytab = getProperty().getProperty("spark.yarn.keytab");
            String principal = getProperty().getProperty("spark.yarn.principal");
            UserGroupInformation.loginUserFromKeytab(principal, keytab);
        } catch (IOException e) {
            throw new RuntimeException("Can not pass kerberos authentication", e);
        }
    }
    conf = new SparkConf();
    URL[] urls = getClassloaderUrls();
    // Very nice discussion about how scala compiler handle classpath
    // https://groups.google.com/forum/#!topic/scala-user/MlVwo2xCCI0
    /*
     * > val env = new nsc.Settings(errLogger) > env.usejavacp.value = true > val p = new
     * Interpreter(env) > p.setContextClassLoader > Alternatively you can set the class path through
     * nsc.Settings.classpath.
     *
     * >> val settings = new Settings() >> settings.usejavacp.value = true >>
     * settings.classpath.value += File.pathSeparator + >> System.getProperty("java.class.path") >>
     * val in = new Interpreter(settings) { >> override protected def parentClassLoader =
     * getClass.getClassLoader >> } >> in.setContextClassLoader()
     */
    Settings settings = new Settings();
    // process args
    String args = getProperty("args");
    if (args == null) {
        args = "";
    }
    String[] argsArray = args.split(" ");
    LinkedList<String> argList = new LinkedList<>();
    for (String arg : argsArray) {
        argList.add(arg);
    }
    DepInterpreter depInterpreter = getDepInterpreter();
    String depInterpreterClasspath = "";
    if (depInterpreter != null) {
        SparkDependencyContext depc = depInterpreter.getDependencyContext();
        if (depc != null) {
            List<File> files = depc.getFiles();
            if (files != null) {
                for (File f : files) {
                    if (depInterpreterClasspath.length() > 0) {
                        depInterpreterClasspath += File.pathSeparator;
                    }
                    depInterpreterClasspath += f.getAbsolutePath();
                }
            }
        }
    }
    if (Utils.isScala2_10()) {
        scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
        Object sparkCommandLine = Utils.instantiateClass("org.apache.spark.repl.SparkCommandLine", new Class[] { scala.collection.immutable.List.class }, new Object[] { list });
        settings = (Settings) Utils.invokeMethod(sparkCommandLine, "settings");
    } else {
        String sparkReplClassDir = getProperty("spark.repl.classdir");
        if (sparkReplClassDir == null) {
            sparkReplClassDir = System.getProperty("spark.repl.classdir");
        }
        if (sparkReplClassDir == null) {
            sparkReplClassDir = System.getProperty("java.io.tmpdir");
        }
        synchronized (sharedInterpreterLock) {
            if (outputDir == null) {
                outputDir = createTempDir(sparkReplClassDir);
            }
        }
        argList.add("-Yrepl-class-based");
        argList.add("-Yrepl-outdir");
        argList.add(outputDir.getAbsolutePath());
        String classpath = "";
        if (conf.contains("spark.jars")) {
            classpath = StringUtils.join(conf.get("spark.jars").split(","), File.separator);
        }
        if (!depInterpreterClasspath.isEmpty()) {
            if (!classpath.isEmpty()) {
                classpath += File.separator;
            }
            classpath += depInterpreterClasspath;
        }
        if (!classpath.isEmpty()) {
            argList.add("-classpath");
            argList.add(classpath);
        }
        scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
        settings.processArguments(list, true);
    }
    // set classpath for scala compiler
    PathSetting pathSettings = settings.classpath();
    String classpath = "";
    List<File> paths = currentClassPath();
    for (File f : paths) {
        if (classpath.length() > 0) {
            classpath += File.pathSeparator;
        }
        classpath += f.getAbsolutePath();
    }
    if (urls != null) {
        for (URL u : urls) {
            if (classpath.length() > 0) {
                classpath += File.pathSeparator;
            }
            classpath += u.getFile();
        }
    }
    // add dependency from DepInterpreter
    if (classpath.length() > 0) {
        classpath += File.pathSeparator;
    }
    classpath += depInterpreterClasspath;
    // add dependency from local repo
    String localRepo = getProperty("zeppelin.interpreter.localRepo");
    if (localRepo != null) {
        File localRepoDir = new File(localRepo);
        if (localRepoDir.exists()) {
            File[] files = localRepoDir.listFiles();
            if (files != null) {
                for (File f : files) {
                    if (classpath.length() > 0) {
                        classpath += File.pathSeparator;
                    }
                    classpath += f.getAbsolutePath();
                }
            }
        }
    }
    pathSettings.v_$eq(classpath);
    settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
    // set classloader for scala compiler
    settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread().getContextClassLoader()));
    BooleanSetting b = (BooleanSetting) settings.usejavacp();
    b.v_$eq(true);
    settings.scala$tools$nsc$settings$StandardScalaSettings$_setter_$usejavacp_$eq(b);
    /* Required for scoped mode.
     * In scoped mode multiple scala compiler (repl) generates class in the same directory.
     * Class names is not randomly generated and look like '$line12.$read$$iw$$iw'
     * Therefore it's possible to generated class conflict(overwrite) with other repl generated
     * class.
     *
     * To prevent generated class name conflict,
     * change prefix of generated class name from each scala compiler (repl) instance.
     *
     * In Spark 2.x, REPL generated wrapper class name should compatible with the pattern
     * ^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$
     *
     * As hashCode() can return a negative integer value and the minus character '-' is invalid
     * in a package name we change it to a numeric value '0' which still conforms to the regexp.
     * 
     */
    System.setProperty("scala.repl.name.line", ("$line" + this.hashCode()).replace('-', '0'));
    // To prevent 'File name too long' error on some file system.
    MutableSettings.IntSetting numClassFileSetting = settings.maxClassfileName();
    numClassFileSetting.v_$eq(128);
    settings.scala$tools$nsc$settings$ScalaSettings$_setter_$maxClassfileName_$eq(numClassFileSetting);
    synchronized (sharedInterpreterLock) {
        /* create scala repl */
        if (printREPLOutput()) {
            this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(out));
        } else {
            this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(Console.out(), false));
        }
        interpreter.settings_$eq(settings);
        interpreter.createInterpreter();
        intp = Utils.invokeMethod(interpreter, "intp");
        Utils.invokeMethod(intp, "setContextClassLoader");
        Utils.invokeMethod(intp, "initializeSynchronous");
        if (Utils.isScala2_10()) {
            if (classOutputDir == null) {
                classOutputDir = settings.outputDirs().getSingleOutput().get();
            } else {
                // change SparkIMain class output dir
                settings.outputDirs().setSingleOutput(classOutputDir);
                ClassLoader cl = (ClassLoader) Utils.invokeMethod(intp, "classLoader");
                try {
                    Field rootField = cl.getClass().getSuperclass().getDeclaredField("root");
                    rootField.setAccessible(true);
                    rootField.set(cl, classOutputDir);
                } catch (NoSuchFieldException | IllegalAccessException e) {
                    logger.error(e.getMessage(), e);
                }
            }
        }
        if (Utils.findClass("org.apache.spark.repl.SparkJLineCompletion", true) != null) {
            completer = Utils.instantiateClass("org.apache.spark.repl.SparkJLineCompletion", new Class[] { Utils.findClass("org.apache.spark.repl.SparkIMain") }, new Object[] { intp });
        } else if (Utils.findClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", true) != null) {
            completer = Utils.instantiateClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", new Class[] { IMain.class }, new Object[] { intp });
        } else if (Utils.findClass("scala.tools.nsc.interpreter.JLineCompletion", true) != null) {
            completer = Utils.instantiateClass("scala.tools.nsc.interpreter.JLineCompletion", new Class[] { IMain.class }, new Object[] { intp });
        }
        if (Utils.isSpark2()) {
            sparkSession = getSparkSession();
        }
        sc = getSparkContext();
        if (sc.getPoolForName("fair").isEmpty()) {
            Value schedulingMode = org.apache.spark.scheduler.SchedulingMode.FAIR();
            int minimumShare = 0;
            int weight = 1;
            Pool pool = new Pool("fair", schedulingMode, minimumShare, weight);
            sc.taskScheduler().rootPool().addSchedulable(pool);
        }
        sparkVersion = SparkVersion.fromVersionString(sc.version());
        sqlc = getSQLContext();
        dep = getDependencyResolver();
        hooks = getInterpreterGroup().getInterpreterHookRegistry();
        z = new ZeppelinContext(sc, sqlc, null, dep, hooks, Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
        interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
        Map<String, Object> binder;
        if (Utils.isScala2_10()) {
            binder = (Map<String, Object>) getValue("_binder");
        } else {
            binder = (Map<String, Object>) getLastObject();
        }
        binder.put("sc", sc);
        binder.put("sqlc", sqlc);
        binder.put("z", z);
        if (Utils.isSpark2()) {
            binder.put("spark", sparkSession);
        }
        interpret("@transient val z = " + "_binder.get(\"z\").asInstanceOf[org.apache.zeppelin.spark.ZeppelinContext]");
        interpret("@transient val sc = " + "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
        interpret("@transient val sqlc = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
        interpret("@transient val sqlContext = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
        if (Utils.isSpark2()) {
            interpret("@transient val spark = " + "_binder.get(\"spark\").asInstanceOf[org.apache.spark.sql.SparkSession]");
        }
        interpret("import org.apache.spark.SparkContext._");
        if (importImplicit()) {
            if (Utils.isSpark2()) {
                interpret("import spark.implicits._");
                interpret("import spark.sql");
                interpret("import org.apache.spark.sql.functions._");
            } else {
                if (sparkVersion.oldSqlContextImplicits()) {
                    interpret("import sqlContext._");
                } else {
                    interpret("import sqlContext.implicits._");
                    interpret("import sqlContext.sql");
                    interpret("import org.apache.spark.sql.functions._");
                }
            }
        }
    }
    if (Utils.isScala2_10()) {
        try {
            if (sparkVersion.oldLoadFilesMethodName()) {
                Method loadFiles = this.interpreter.getClass().getMethod("loadFiles", Settings.class);
                loadFiles.invoke(this.interpreter, settings);
            } else {
                Method loadFiles = this.interpreter.getClass().getMethod("org$apache$spark$repl$SparkILoop$$loadFiles", Settings.class);
                loadFiles.invoke(this.interpreter, settings);
            }
        } catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
            throw new InterpreterException(e);
        }
    }
    // add jar from DepInterpreter
    if (depInterpreter != null) {
        SparkDependencyContext depc = depInterpreter.getDependencyContext();
        if (depc != null) {
            List<File> files = depc.getFilesDist();
            if (files != null) {
                for (File f : files) {
                    if (f.getName().toLowerCase().endsWith(".jar")) {
                        sc.addJar(f.getAbsolutePath());
                        logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
                    } else {
                        sc.addFile(f.getAbsolutePath());
                        logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
                    }
                }
            }
        }
    }
    // add jar from local repo
    if (localRepo != null) {
        File localRepoDir = new File(localRepo);
        if (localRepoDir.exists()) {
            File[] files = localRepoDir.listFiles();
            if (files != null) {
                for (File f : files) {
                    if (f.getName().toLowerCase().endsWith(".jar")) {
                        sc.addJar(f.getAbsolutePath());
                        logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
                    } else {
                        sc.addFile(f.getAbsolutePath());
                        logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
                    }
                }
            }
        }
    }
    numReferenceOfSparkContext.incrementAndGet();
}
Also used : scala(scala) InterpreterException(org.apache.zeppelin.interpreter.InterpreterException) SparkDependencyContext(org.apache.zeppelin.spark.dep.SparkDependencyContext) IMain(scala.tools.nsc.interpreter.IMain) URL(java.net.URL) Field(java.lang.reflect.Field) PathSetting(scala.tools.nsc.settings.MutableSettings.PathSetting) URLClassLoader(java.net.URLClassLoader) ResourcePool(org.apache.zeppelin.resource.ResourcePool) Pool(org.apache.spark.scheduler.Pool) MutableSettings(scala.tools.nsc.settings.MutableSettings) Settings(scala.tools.nsc.Settings) PrintWriter(java.io.PrintWriter) MutableSettings(scala.tools.nsc.settings.MutableSettings) BooleanSetting(scala.tools.nsc.settings.MutableSettings.BooleanSetting) SparkILoop(org.apache.spark.repl.SparkILoop) IOException(java.io.IOException) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) Value(scala.Enumeration.Value) SparkConf(org.apache.spark.SparkConf) AbstractFile(scala.reflect.io.AbstractFile) File(java.io.File)

Aggregations

File (java.io.File)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 Field (java.lang.reflect.Field)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 URL (java.net.URL)1 URLClassLoader (java.net.URLClassLoader)1 SparkConf (org.apache.spark.SparkConf)1 SparkILoop (org.apache.spark.repl.SparkILoop)1 Pool (org.apache.spark.scheduler.Pool)1 InterpreterException (org.apache.zeppelin.interpreter.InterpreterException)1 ResourcePool (org.apache.zeppelin.resource.ResourcePool)1 SparkDependencyContext (org.apache.zeppelin.spark.dep.SparkDependencyContext)1 scala (scala)1 Value (scala.Enumeration.Value)1 AbstractFile (scala.reflect.io.AbstractFile)1 Settings (scala.tools.nsc.Settings)1 IMain (scala.tools.nsc.interpreter.IMain)1 MutableSettings (scala.tools.nsc.settings.MutableSettings)1