use of org.apache.spark.SparkConf in project cdap by caskdata.
the class SparkClassLoader method getSparkExecutionContext.
/**
* Returns the {@link SparkExecutionContext}.
*
* @param createIfNotExists {@code true} to create a new {@link SparkExecutionContext} if one doesn't exist yet
* in the current execution context. Only the {@link SparkMainWrapper} should
* pass in {@code true}.
*/
public synchronized SparkExecutionContext getSparkExecutionContext(boolean createIfNotExists) {
if (sparkExecutionContext != null) {
return sparkExecutionContext;
}
if (!createIfNotExists) {
throw new IllegalStateException("SparkExecutionContext does not exist. " + "This is caused by using SparkExecutionContext from a " + "closure function executing in Spark executor process. " + "SparkExecutionContext can only be used in Spark driver process.");
}
SparkConf sparkConf = new SparkConf();
File resourcesDir = new File(sparkConf.get("spark.local.dir", System.getProperty("user.dir")));
sparkExecutionContext = new DefaultSparkExecutionContext(this, SparkRuntimeUtils.getLocalizedResources(resourcesDir, sparkConf));
return sparkExecutionContext;
}
use of org.apache.spark.SparkConf in project cdap by caskdata.
the class SparkRuntimeService method createSubmitConfigs.
/**
* Creates the configurations for the spark submitter.
*/
private Map<String, String> createSubmitConfigs(File localDir, String metricsConfPath, String classpath, Map<String, LocalizeResource> localizedResources, boolean localMode) throws Exception {
Map<String, String> configs = new HashMap<>();
// Make Spark UI runs on random port. By default, Spark UI runs on port 4040 and it will do a sequential search
// of the next port if 4040 is already occupied. However, during the process, it unnecessarily logs big stacktrace
// as WARN, which pollute the logs a lot if there are concurrent Spark job running (e.g. a fork in Workflow).
configs.put("spark.ui.port", "0");
// Setup configs from the default spark conf
Properties sparkDefaultConf = SparkPackageUtils.getSparkDefaultConf();
configs.putAll(Maps.fromProperties(sparkDefaultConf));
// Setup app.id and executor.id for Metric System
configs.put("spark.app.id", context.getApplicationSpecification().getName());
configs.put("spark.executor.id", context.getApplicationSpecification().getName());
// Setups the resources requirements for driver and executor. The user can override it with the SparkConf.
configs.put("spark.driver.memory", context.getDriverResources().getMemoryMB() + "m");
configs.put("spark.driver.cores", String.valueOf(context.getDriverResources().getVirtualCores()));
configs.put("spark.executor.memory", context.getExecutorResources().getMemoryMB() + "m");
configs.put("spark.executor.cores", String.valueOf(context.getExecutorResources().getVirtualCores()));
// Add user specified configs first. CDAP specifics config will override them later if there are duplicates.
SparkConf sparkConf = context.getSparkConf();
if (sparkConf != null) {
for (Tuple2<String, String> tuple : sparkConf.getAll()) {
configs.put(tuple._1(), tuple._2());
}
}
// jvm we are adding these to their classpath.
if (!localMode) {
String extraClassPath = Paths.get("$PWD", CDAP_LAUNCHER_JAR) + File.pathSeparator + classpath;
// Set extraClasspath config by appending user specified extra classpath
prependConfig(configs, "spark.driver.extraClassPath", extraClassPath, File.pathSeparator);
prependConfig(configs, "spark.executor.extraClassPath", extraClassPath, File.pathSeparator);
} else {
// Only need to set this for local mode.
// In distributed mode, Spark will not use this but instead use the yarn container directory.
configs.put("spark.local.dir", localDir.getAbsolutePath());
}
configs.put("spark.metrics.conf", metricsConfPath);
SparkRuntimeUtils.setLocalizedResources(localizedResources.keySet(), configs);
return configs;
}
use of org.apache.spark.SparkConf in project cdap by caskdata.
the class SparkRuntimeUtilsTest method testCreateConfArchive.
@Test
public void testCreateConfArchive() throws IOException {
File confDir = TEMP_FOLDER.newFolder();
File confFile = new File(confDir, "testing.conf");
Files.write("Testing Message", confFile, Charsets.UTF_8);
SparkConf conf = new SparkConf();
conf.set("testing", "value");
File archiveFile = SparkRuntimeUtils.createConfArchive(conf, "test.properties", confDir.getAbsolutePath(), TEMP_FOLDER.newFile().getAbsolutePath());
try (ZipFile zipFile = new ZipFile(archiveFile)) {
Properties properties = new Properties();
try (InputStream is = zipFile.getInputStream(zipFile.getEntry("test.properties"))) {
properties.load(is);
Assert.assertEquals("value", properties.getProperty("testing"));
}
try (InputStream is = zipFile.getInputStream(zipFile.getEntry("testing.conf"))) {
Assert.assertEquals("Testing Message", Bytes.toString(ByteStreams.toByteArray(is)));
}
}
}
Aggregations