use of org.apache.spark.SparkConf in project cdap by caskdata.
the class ClassicSparkProgram method main.
public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.kryo.registrator", MyKryoRegistrator.class.getName());
Schema schema = Schema.recordOf("record", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("id", Schema.of(Schema.Type.INT)));
List<StructuredRecord> records = new ArrayList<>();
for (int i = 1; i <= 10; i++) {
records.add(StructuredRecord.builder(schema).set("name", "Name" + i).set("id", i).build());
}
// This test serialization of StructuredRecord as well as using custom kryo serializer
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
int result = jsc.parallelize(records).mapToPair(new PairFunction<StructuredRecord, MyInt, StructuredRecord>() {
@Override
public Tuple2<MyInt, StructuredRecord> call(StructuredRecord record) throws Exception {
return new Tuple2<>(new MyInt((Integer) record.get("id")), record);
}
}).map(new Function<Tuple2<MyInt, StructuredRecord>, MyInt>() {
@Override
public MyInt call(Tuple2<MyInt, StructuredRecord> tuple) throws Exception {
return tuple._1;
}
}).reduce(new Function2<MyInt, MyInt, MyInt>() {
@Override
public MyInt call(MyInt v1, MyInt v2) throws Exception {
return new MyInt(v1.toInt() + v2.toInt());
}
}).toInt();
if (result != 55) {
throw new Exception("Expected result to be 55");
}
}
use of org.apache.spark.SparkConf in project cdap by caskdata.
the class CharCountProgram method initialize.
@Override
public void initialize() throws Exception {
SparkClientContext context = getContext();
context.setSparkConf(new SparkConf().set("spark.io.compression.codec", "org.apache.spark.io.LZFCompressionCodec"));
Table totals = context.getDataset("totals");
totals.get(new Get("total").add("total")).getLong("total");
totals.put(new Put("total").add("total", 0L));
}
use of org.apache.spark.SparkConf in project zeppelin by apache.
the class PySparkInterpreter method setupPySparkEnv.
private Map setupPySparkEnv() throws IOException {
Map env = EnvironmentUtils.getProcEnvironment();
if (!env.containsKey("PYTHONPATH")) {
SparkConf conf = getSparkConf();
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":") + ":../interpreter/lib/python");
}
// also, add all packages to PYTHONPATH since there might be transitive dependencies
if (SparkInterpreter.useSparkSubmit() && !getSparkInterpreter().isYarnMode()) {
String sparkSubmitJars = getSparkConf().get("spark.jars").replace(",", ":");
if (!"".equals(sparkSubmitJars)) {
env.put("PYTHONPATH", env.get("PYTHONPATH") + sparkSubmitJars);
}
}
return env;
}
use of org.apache.spark.SparkConf in project zeppelin by apache.
the class SparkInterpreter method open.
@Override
public void open() {
// set properties and do login before creating any spark stuff for secured cluster
if (isYarnMode()) {
System.setProperty("SPARK_YARN_MODE", "true");
}
if (getProperty().containsKey("spark.yarn.keytab") && getProperty().containsKey("spark.yarn.principal")) {
try {
String keytab = getProperty().getProperty("spark.yarn.keytab");
String principal = getProperty().getProperty("spark.yarn.principal");
UserGroupInformation.loginUserFromKeytab(principal, keytab);
} catch (IOException e) {
throw new RuntimeException("Can not pass kerberos authentication", e);
}
}
conf = new SparkConf();
URL[] urls = getClassloaderUrls();
// Very nice discussion about how scala compiler handle classpath
// https://groups.google.com/forum/#!topic/scala-user/MlVwo2xCCI0
/*
* > val env = new nsc.Settings(errLogger) > env.usejavacp.value = true > val p = new
* Interpreter(env) > p.setContextClassLoader > Alternatively you can set the class path through
* nsc.Settings.classpath.
*
* >> val settings = new Settings() >> settings.usejavacp.value = true >>
* settings.classpath.value += File.pathSeparator + >> System.getProperty("java.class.path") >>
* val in = new Interpreter(settings) { >> override protected def parentClassLoader =
* getClass.getClassLoader >> } >> in.setContextClassLoader()
*/
Settings settings = new Settings();
// process args
String args = getProperty("args");
if (args == null) {
args = "";
}
String[] argsArray = args.split(" ");
LinkedList<String> argList = new LinkedList<>();
for (String arg : argsArray) {
argList.add(arg);
}
DepInterpreter depInterpreter = getDepInterpreter();
String depInterpreterClasspath = "";
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFiles();
if (files != null) {
for (File f : files) {
if (depInterpreterClasspath.length() > 0) {
depInterpreterClasspath += File.pathSeparator;
}
depInterpreterClasspath += f.getAbsolutePath();
}
}
}
}
if (Utils.isScala2_10()) {
scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
Object sparkCommandLine = Utils.instantiateClass("org.apache.spark.repl.SparkCommandLine", new Class[] { scala.collection.immutable.List.class }, new Object[] { list });
settings = (Settings) Utils.invokeMethod(sparkCommandLine, "settings");
} else {
String sparkReplClassDir = getProperty("spark.repl.classdir");
if (sparkReplClassDir == null) {
sparkReplClassDir = System.getProperty("spark.repl.classdir");
}
if (sparkReplClassDir == null) {
sparkReplClassDir = System.getProperty("java.io.tmpdir");
}
synchronized (sharedInterpreterLock) {
if (outputDir == null) {
outputDir = createTempDir(sparkReplClassDir);
}
}
argList.add("-Yrepl-class-based");
argList.add("-Yrepl-outdir");
argList.add(outputDir.getAbsolutePath());
String classpath = "";
if (conf.contains("spark.jars")) {
classpath = StringUtils.join(conf.get("spark.jars").split(","), File.separator);
}
if (!depInterpreterClasspath.isEmpty()) {
if (!classpath.isEmpty()) {
classpath += File.separator;
}
classpath += depInterpreterClasspath;
}
if (!classpath.isEmpty()) {
argList.add("-classpath");
argList.add(classpath);
}
scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();
settings.processArguments(list, true);
}
// set classpath for scala compiler
PathSetting pathSettings = settings.classpath();
String classpath = "";
List<File> paths = currentClassPath();
for (File f : paths) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += f.getAbsolutePath();
}
if (urls != null) {
for (URL u : urls) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += u.getFile();
}
}
// add dependency from DepInterpreter
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += depInterpreterClasspath;
// add dependency from local repo
String localRepo = getProperty("zeppelin.interpreter.localRepo");
if (localRepo != null) {
File localRepoDir = new File(localRepo);
if (localRepoDir.exists()) {
File[] files = localRepoDir.listFiles();
if (files != null) {
for (File f : files) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += f.getAbsolutePath();
}
}
}
}
pathSettings.v_$eq(classpath);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
// set classloader for scala compiler
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread().getContextClassLoader()));
BooleanSetting b = (BooleanSetting) settings.usejavacp();
b.v_$eq(true);
settings.scala$tools$nsc$settings$StandardScalaSettings$_setter_$usejavacp_$eq(b);
/* Required for scoped mode.
* In scoped mode multiple scala compiler (repl) generates class in the same directory.
* Class names is not randomly generated and look like '$line12.$read$$iw$$iw'
* Therefore it's possible to generated class conflict(overwrite) with other repl generated
* class.
*
* To prevent generated class name conflict,
* change prefix of generated class name from each scala compiler (repl) instance.
*
* In Spark 2.x, REPL generated wrapper class name should compatible with the pattern
* ^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$
*
* As hashCode() can return a negative integer value and the minus character '-' is invalid
* in a package name we change it to a numeric value '0' which still conforms to the regexp.
*
*/
System.setProperty("scala.repl.name.line", ("$line" + this.hashCode()).replace('-', '0'));
// To prevent 'File name too long' error on some file system.
MutableSettings.IntSetting numClassFileSetting = settings.maxClassfileName();
numClassFileSetting.v_$eq(128);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$maxClassfileName_$eq(numClassFileSetting);
synchronized (sharedInterpreterLock) {
/* create scala repl */
if (printREPLOutput()) {
this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(out));
} else {
this.interpreter = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(Console.out(), false));
}
interpreter.settings_$eq(settings);
interpreter.createInterpreter();
intp = Utils.invokeMethod(interpreter, "intp");
Utils.invokeMethod(intp, "setContextClassLoader");
Utils.invokeMethod(intp, "initializeSynchronous");
if (Utils.isScala2_10()) {
if (classOutputDir == null) {
classOutputDir = settings.outputDirs().getSingleOutput().get();
} else {
// change SparkIMain class output dir
settings.outputDirs().setSingleOutput(classOutputDir);
ClassLoader cl = (ClassLoader) Utils.invokeMethod(intp, "classLoader");
try {
Field rootField = cl.getClass().getSuperclass().getDeclaredField("root");
rootField.setAccessible(true);
rootField.set(cl, classOutputDir);
} catch (NoSuchFieldException | IllegalAccessException e) {
logger.error(e.getMessage(), e);
}
}
}
if (Utils.findClass("org.apache.spark.repl.SparkJLineCompletion", true) != null) {
completer = Utils.instantiateClass("org.apache.spark.repl.SparkJLineCompletion", new Class[] { Utils.findClass("org.apache.spark.repl.SparkIMain") }, new Object[] { intp });
} else if (Utils.findClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", true) != null) {
completer = Utils.instantiateClass("scala.tools.nsc.interpreter.PresentationCompilerCompleter", new Class[] { IMain.class }, new Object[] { intp });
} else if (Utils.findClass("scala.tools.nsc.interpreter.JLineCompletion", true) != null) {
completer = Utils.instantiateClass("scala.tools.nsc.interpreter.JLineCompletion", new Class[] { IMain.class }, new Object[] { intp });
}
if (Utils.isSpark2()) {
sparkSession = getSparkSession();
}
sc = getSparkContext();
if (sc.getPoolForName("fair").isEmpty()) {
Value schedulingMode = org.apache.spark.scheduler.SchedulingMode.FAIR();
int minimumShare = 0;
int weight = 1;
Pool pool = new Pool("fair", schedulingMode, minimumShare, weight);
sc.taskScheduler().rootPool().addSchedulable(pool);
}
sparkVersion = SparkVersion.fromVersionString(sc.version());
sqlc = getSQLContext();
dep = getDependencyResolver();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new ZeppelinContext(sc, sqlc, null, dep, hooks, Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
Map<String, Object> binder;
if (Utils.isScala2_10()) {
binder = (Map<String, Object>) getValue("_binder");
} else {
binder = (Map<String, Object>) getLastObject();
}
binder.put("sc", sc);
binder.put("sqlc", sqlc);
binder.put("z", z);
if (Utils.isSpark2()) {
binder.put("spark", sparkSession);
}
interpret("@transient val z = " + "_binder.get(\"z\").asInstanceOf[org.apache.zeppelin.spark.ZeppelinContext]");
interpret("@transient val sc = " + "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
interpret("@transient val sqlc = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
interpret("@transient val sqlContext = " + "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
if (Utils.isSpark2()) {
interpret("@transient val spark = " + "_binder.get(\"spark\").asInstanceOf[org.apache.spark.sql.SparkSession]");
}
interpret("import org.apache.spark.SparkContext._");
if (importImplicit()) {
if (Utils.isSpark2()) {
interpret("import spark.implicits._");
interpret("import spark.sql");
interpret("import org.apache.spark.sql.functions._");
} else {
if (sparkVersion.oldSqlContextImplicits()) {
interpret("import sqlContext._");
} else {
interpret("import sqlContext.implicits._");
interpret("import sqlContext.sql");
interpret("import org.apache.spark.sql.functions._");
}
}
}
}
if (Utils.isScala2_10()) {
try {
if (sparkVersion.oldLoadFilesMethodName()) {
Method loadFiles = this.interpreter.getClass().getMethod("loadFiles", Settings.class);
loadFiles.invoke(this.interpreter, settings);
} else {
Method loadFiles = this.interpreter.getClass().getMethod("org$apache$spark$repl$SparkILoop$$loadFiles", Settings.class);
loadFiles.invoke(this.interpreter, settings);
}
} catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
throw new InterpreterException(e);
}
}
// add jar from DepInterpreter
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFilesDist();
if (files != null) {
for (File f : files) {
if (f.getName().toLowerCase().endsWith(".jar")) {
sc.addJar(f.getAbsolutePath());
logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
} else {
sc.addFile(f.getAbsolutePath());
logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
}
}
}
}
}
// add jar from local repo
if (localRepo != null) {
File localRepoDir = new File(localRepo);
if (localRepoDir.exists()) {
File[] files = localRepoDir.listFiles();
if (files != null) {
for (File f : files) {
if (f.getName().toLowerCase().endsWith(".jar")) {
sc.addJar(f.getAbsolutePath());
logger.info("sc.addJar(" + f.getAbsolutePath() + ")");
} else {
sc.addFile(f.getAbsolutePath());
logger.info("sc.addFile(" + f.getAbsolutePath() + ")");
}
}
}
}
}
numReferenceOfSparkContext.incrementAndGet();
}
use of org.apache.spark.SparkConf in project zeppelin by apache.
the class SparkInterpreterTest method emptyConfigurationVariablesOnlyForNonSparkProperties.
@Test
public void emptyConfigurationVariablesOnlyForNonSparkProperties() {
Properties intpProperty = repl.getProperty();
SparkConf sparkConf = repl.getSparkContext().getConf();
for (Object oKey : intpProperty.keySet()) {
String key = (String) oKey;
String value = (String) intpProperty.get(key);
LOGGER.debug(String.format("[%s]: [%s]", key, value));
if (key.startsWith("spark.") && value.isEmpty()) {
assertTrue(String.format("configuration starting from 'spark.' should not be empty. [%s]", key), !sparkConf.contains(key) || !sparkConf.get(key).isEmpty());
}
}
}
Aggregations