use of org.apache.spark.scheduler.Pool in project zeppelin by apache.
the class SparkInterpreter method open.
@Override
public void open() {
// set properties and do login before creating any spark stuff for secured cluster
if (isYarnMode()) {
System.setProperty("SPARK_YARN_MODE", "true");
}
if (getProperty().containsKey("spark.yarn.keytab") && getProperty().containsKey("spark.yarn.principal")) {
try {
String keytab = getProperty().getProperty("spark.yarn.keytab");
String principal = getProperty().getProperty("spark.yarn.principal");
UserGroupInformation.loginUserFromKeytab(principal, keytab);
} catch (IOException e) {
throw new RuntimeException("Can not pass kerberos authentication", e);
}
}
conf = new SparkConf();
URL[] urls = getClassloaderUrls();
// Very nice discussion about how scala compiler handle classpath
// https://groups.google.com/forum/#!topic/scala-user/MlVwo2xCCI0
/*
* > val env = new nsc.Settings(errLogger) > env.usejavacp.value = true > val p = new
* Interpreter(env) > p.setContextClassLoader > Alternatively you can set the class path through
* nsc.Settings.classpath.
*
* >> val settings = new Settings() >> settings.usejavacp.value = true >>
* settings.classpath.value += File.pathSeparator + >> System.getProperty("java.class.path") >>
* val in = new Interpreter(settings) { >> override protected def parentClassLoader =
* getClass.getClassLoader >> } >> in.setContextClassLoader()
*/
Settings settings = new Settings();
// process args
String args = getProperty("args");
if (args == null) {
args = "";
}
String[] argsArray = args.split(" ");
LinkedList<String> argList = new LinkedList<>();
for (String arg : argsArray) {
argList.add(arg);
}
DepInterpreter depInterpreter = getDepInterpreter();
String depInterpreterClasspath = "";
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFiles();
if (files != null) {
for (File f : files) {
if (depInterpreterClasspath.length() > 0) {
depInterpreterClasspath += File.pathSeparator;
}
depInterpreterClasspath += f.getAbsolutePath();
}
}
}
}
if (Utils.isScala2_10()) {
scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
Object sparkCommandLine = Utils.instantiateClass("org.apache.spark.repl.SparkCommandLine", new Class[] { scala.collection.immutable.List.class }, new Object[] { list });
settings = (Settings) Utils.invokeMethod(sparkCommandLine, "settings");
} else {
String sparkReplClassDir = getProperty("spark.repl.classdir");
if (sparkReplClassDir == null) {
sparkReplClassDir = System.getProperty("spark.repl.classdir");
}
if (sparkReplClassDir == null) {
sparkReplClassDir = System.getProperty("java.io.tmpdir");
}
synchronized (sharedInterpreterLock) {
if (outputDir == null) {
outputDir = createTempDir(sparkReplClassDir);
}
}
argList.add("-Yrepl-class-based");
argList.add("-Yrepl-outdir");
argList.add(outputDir.getAbsolutePath());
String classpath = "";
if (conf.contains("spark.jars")) {
classpath = StringUtils.join(conf.get("spark.jars").split(","), File.separator);
}
if (!depInterpreterClasspath.isEmpty()) {
if (!classpath.isEmpty()) {
classpath += File.separator;
}
classpath += depInterpreterClasspath;
}
if (!classpath.isEmpty()) {
argList.add("-classpath");
argList.add(classpath);
}
scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
settings.processArguments(list, true);
}
// set classpath for scala compiler
PathSetting pathSettings = settings.classpath();
String classpath = "";
List<File> paths = currentClassPath();
for (File f : paths) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += f.getAbsolutePath();
}
if (urls != null) {
for (URL u : urls) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += u.getFile();
}
}
// add dependency from DepInterpreter
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += depInterpreterClasspath;
// add dependency from local repo
String localRepo = getProperty("zeppelin.interpreter.localRepo");
if (localRepo != null) {
File localRepoDir = new File(localRepo);
if (localRepoDir.exists()) {
File[] files = localRepoDir.listFiles();
if (files != null) {
for (File f : files) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += f.getAbsolutePath();
}
}
}
}
pathSettings.v_$eq(classpath);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
// set classloader for scala compiler
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread().getContextClassLoader()));
BooleanSetting b = (BooleanSetting) settings.usejavacp();
b.v_$eq(true);
settings.scala$tools$nsc$settings$StandardScalaSettings$_setter_$usejavacp_$eq(b);
/* Required for scoped mode.
* In scoped mode multiple scala compiler (repl) generates class in the same directory.
* Class names is not randomly generated and look like '$line12.$read$$iw$$iw'
* Therefore it's possible to generated class conflict(overwrite) with other repl generated
* class.
*
* To prevent generated class name conflict,
* change prefix of generated class name from each scala compiler (repl) instance.
*
* In Spark 2.x, REPL generated wrapper class name should compatible with the pattern
* ^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$
*
* As hashCode() can return a negative integer value and the minus character '-' is invalid
* in a package name we change it to a numeric value '0' which still conforms to the regexp.
*
*/
System.setProperty("scala.repl.name.line", ("$line" + this.hashCode()).replace('-', '0'));
// To prevent 'File name too long' error on some file system.
MutableSettings.IntSetting numClassFileSetting = settings.maxClassfileName();
numClassFileSetting.v_$eq(128);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$maxClassfileName_$eq(numClassFileSetting);
synchronized (sharedInterpreterLock) {
/* create scala repl */
if (printREPLOutput()) {
this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(out));
} else {
this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(Console.out(), false));
}
interpreter.settings_$eq(settings);
interpreter.createInterpreter();
intp = Utils.invokeMethod(interpreter, "intp");
Utils.invokeMethod(intp, "setContextClassLoader");
Utils.invokeMethod(intp, "initializeSynchronous");
if (Utils.isScala2_10()) {
if (classOutputDir == null) {
classOutputDir = settings.outputDirs().getSingleOutput().get();
} else {
// change SparkIMain class output dir
settings.outputDirs().setSingleOutput(classOutputDir);
ClassLoader cl = (ClassLoader) Utils.invokeMethod(intp, "classLoader");
try {
Field rootField = cl.getClass().getSuperclass().getDeclaredField("root");
rootField.setAccessible(true);
rootField.set(cl, classOutputDir);
} catch (NoSuchFieldException | IllegalAccessException e) {
logger.error(e.getMessage(), e);
}
}
}
if (Utils.findClass("org.apache.spark.repl.SparkJLineCompletion", true) != null) {
completer = Utils.instantiateClass("org.apache.spark.repl.SparkJLineCompletion", new Class[] { Utils.findClass("org.apache.spark.repl.SparkIMain") }, new Object[] { intp });
} else if (Utils.findClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", true) != null) {
completer = Utils.instantiateClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", new Class[] { IMain.class }, new Object[] { intp });
} else if (Utils.findClass("scala.tools.nsc.interpreter.JLineCompletion", true) != null) {
completer = Utils.instantiateClass("scala.tools.nsc.interpreter.JLineCompletion", new Class[] { IMain.class }, new Object[] { intp });
}
if (Utils.isSpark2()) {
sparkSession = getSparkSession();
}
sc = getSparkContext();
if (sc.getPoolForName("fair").isEmpty()) {
Value schedulingMode = org.apache.spark.scheduler.SchedulingMode.FAIR();
int minimumShare = 0;
int weight = 1;
Pool pool = new Pool("fair", schedulingMode, minimumShare, weight);
sc.taskScheduler().rootPool().addSchedulable(pool);
}
sparkVersion = SparkVersion.fromVersionString(sc.version());
sqlc = getSQLContext();
dep = getDependencyResolver();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new ZeppelinContext(sc, sqlc, null, dep, hooks, Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
Map<String, Object> binder;
if (Utils.isScala2_10()) {
binder = (Map<String, Object>) getValue("_binder");
} else {
binder = (Map<String, Object>) getLastObject();
}
binder.put("sc", sc);
binder.put("sqlc", sqlc);
binder.put("z", z);
if (Utils.isSpark2()) {
binder.put("spark", sparkSession);
}
interpret("@transient val z = " + "_binder.get(\"z\").asInstanceOf[org.apache.zeppelin.spark.ZeppelinContext]");
interpret("@transient val sc = " + "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
interpret("@transient val sqlc = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
interpret("@transient val sqlContext = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
if (Utils.isSpark2()) {
interpret("@transient val spark = " + "_binder.get(\"spark\").asInstanceOf[org.apache.spark.sql.SparkSession]");
}
interpret("import org.apache.spark.SparkContext._");
if (importImplicit()) {
if (Utils.isSpark2()) {
interpret("import spark.implicits._");
interpret("import spark.sql");
interpret("import org.apache.spark.sql.functions._");
} else {
if (sparkVersion.oldSqlContextImplicits()) {
interpret("import sqlContext._");
} else {
interpret("import sqlContext.implicits._");
interpret("import sqlContext.sql");
interpret("import org.apache.spark.sql.functions._");
}
}
}
}
if (Utils.isScala2_10()) {
try {
if (sparkVersion.oldLoadFilesMethodName()) {
Method loadFiles = this.interpreter.getClass().getMethod("loadFiles", Settings.class);
loadFiles.invoke(this.interpreter, settings);
} else {
Method loadFiles = this.interpreter.getClass().getMethod("org$apache$spark$repl$SparkILoop$$loadFiles", Settings.class);
loadFiles.invoke(this.interpreter, settings);
}
} catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
throw new InterpreterException(e);
}
}
// add jar from DepInterpreter
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFilesDist();
if (files != null) {
for (File f : files) {
if (f.getName().toLowerCase().endsWith(".jar")) {
sc.addJar(f.getAbsolutePath());
logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
} else {
sc.addFile(f.getAbsolutePath());
logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
}
}
}
}
}
// add jar from local repo
if (localRepo != null) {
File localRepoDir = new File(localRepo);
if (localRepoDir.exists()) {
File[] files = localRepoDir.listFiles();
if (files != null) {
for (File f : files) {
if (f.getName().toLowerCase().endsWith(".jar")) {
sc.addJar(f.getAbsolutePath());
logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
} else {
sc.addFile(f.getAbsolutePath());
logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
}
}
}
}
}
numReferenceOfSparkContext.incrementAndGet();
}
Aggregations