Search in sources :

Example 81 with SparkConf

use of org.apache.spark.SparkConf in project cdap by caskdata.

the class SparkClassLoader method getSparkExecutionContext.

/**
   * Returns the {@link SparkExecutionContext}.
   *
   * @param createIfNotExists {@code true} to create a new {@link SparkExecutionContext} if one doesn't exist yet
   *                                      in the current execution context. Only the {@link SparkMainWrapper} should
   *                                      pass in {@code true}.
   */
public synchronized SparkExecutionContext getSparkExecutionContext(boolean createIfNotExists) {
    if (sparkExecutionContext != null) {
        return sparkExecutionContext;
    }
    if (!createIfNotExists) {
        throw new IllegalStateException("SparkExecutionContext does not exist. " + "This is caused by using SparkExecutionContext from a " + "closure function executing in Spark executor process. " + "SparkExecutionContext can only be used in Spark driver process.");
    }
    SparkConf sparkConf = new SparkConf();
    File resourcesDir = new File(sparkConf.get("spark.local.dir", System.getProperty("user.dir")));
    sparkExecutionContext = new DefaultSparkExecutionContext(this, SparkRuntimeUtils.getLocalizedResources(resourcesDir, sparkConf));
    return sparkExecutionContext;
}
Also used : SparkConf(org.apache.spark.SparkConf) File(java.io.File)

Example 82 with SparkConf

use of org.apache.spark.SparkConf in project cdap by caskdata.

the class SparkRuntimeService method createSubmitConfigs.

/**
   * Creates the configurations for the spark submitter.
   */
private Map<String, String> createSubmitConfigs(File localDir, String metricsConfPath, String classpath, Map<String, LocalizeResource> localizedResources, boolean localMode) throws Exception {
    Map<String, String> configs = new HashMap<>();
    // Make Spark UI runs on random port. By default, Spark UI runs on port 4040 and it will do a sequential search
    // of the next port if 4040 is already occupied. However, during the process, it unnecessarily logs big stacktrace
    // as WARN, which pollute the logs a lot if there are concurrent Spark job running (e.g. a fork in Workflow).
    configs.put("spark.ui.port", "0");
    // Setup configs from the default spark conf
    Properties sparkDefaultConf = SparkPackageUtils.getSparkDefaultConf();
    configs.putAll(Maps.fromProperties(sparkDefaultConf));
    // Setup app.id and executor.id for Metric System
    configs.put("spark.app.id", context.getApplicationSpecification().getName());
    configs.put("spark.executor.id", context.getApplicationSpecification().getName());
    // Setups the resources requirements for driver and executor. The user can override it with the SparkConf.
    configs.put("spark.driver.memory", context.getDriverResources().getMemoryMB() + "m");
    configs.put("spark.driver.cores", String.valueOf(context.getDriverResources().getVirtualCores()));
    configs.put("spark.executor.memory", context.getExecutorResources().getMemoryMB() + "m");
    configs.put("spark.executor.cores", String.valueOf(context.getExecutorResources().getVirtualCores()));
    // Add user specified configs first. CDAP specifics config will override them later if there are duplicates.
    SparkConf sparkConf = context.getSparkConf();
    if (sparkConf != null) {
        for (Tuple2<String, String> tuple : sparkConf.getAll()) {
            configs.put(tuple._1(), tuple._2());
        }
    }
    // jvm we are adding these to their classpath.
    if (!localMode) {
        String extraClassPath = Paths.get("$PWD", CDAP_LAUNCHER_JAR) + File.pathSeparator + classpath;
        // Set extraClasspath config by appending user specified extra classpath
        prependConfig(configs, "spark.driver.extraClassPath", extraClassPath, File.pathSeparator);
        prependConfig(configs, "spark.executor.extraClassPath", extraClassPath, File.pathSeparator);
    } else {
        // Only need to set this for local mode.
        // In distributed mode, Spark will not use this but instead use the yarn container directory.
        configs.put("spark.local.dir", localDir.getAbsolutePath());
    }
    configs.put("spark.metrics.conf", metricsConfPath);
    SparkRuntimeUtils.setLocalizedResources(localizedResources.keySet(), configs);
    return configs;
}
Also used : HashMap(java.util.HashMap) Properties(java.util.Properties) SparkConf(org.apache.spark.SparkConf)

Example 83 with SparkConf

use of org.apache.spark.SparkConf in project cdap by caskdata.

the class SparkRuntimeUtilsTest method testCreateConfArchive.

@Test
public void testCreateConfArchive() throws IOException {
    File confDir = TEMP_FOLDER.newFolder();
    File confFile = new File(confDir, "testing.conf");
    Files.write("Testing Message", confFile, Charsets.UTF_8);
    SparkConf conf = new SparkConf();
    conf.set("testing", "value");
    File archiveFile = SparkRuntimeUtils.createConfArchive(conf, "test.properties", confDir.getAbsolutePath(), TEMP_FOLDER.newFile().getAbsolutePath());
    try (ZipFile zipFile = new ZipFile(archiveFile)) {
        Properties properties = new Properties();
        try (InputStream is = zipFile.getInputStream(zipFile.getEntry("test.properties"))) {
            properties.load(is);
            Assert.assertEquals("value", properties.getProperty("testing"));
        }
        try (InputStream is = zipFile.getInputStream(zipFile.getEntry("testing.conf"))) {
            Assert.assertEquals("Testing Message", Bytes.toString(ByteStreams.toByteArray(is)));
        }
    }
}
Also used : ZipFile(java.util.zip.ZipFile) InputStream(java.io.InputStream) Properties(java.util.Properties) File(java.io.File) ZipFile(java.util.zip.ZipFile) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Aggregations

SparkConf (org.apache.spark.SparkConf)83 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)46 Test (org.junit.Test)21 ArrayList (java.util.ArrayList)20 Configuration (org.apache.hadoop.conf.Configuration)20 Tuple2 (scala.Tuple2)15 Graph (uk.gov.gchq.gaffer.graph.Graph)13 DataOutputStream (java.io.DataOutputStream)11 File (java.io.File)10 HashSet (java.util.HashSet)10 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)10 Edge (uk.gov.gchq.gaffer.data.element.Edge)10 Element (uk.gov.gchq.gaffer.data.element.Element)10 Entity (uk.gov.gchq.gaffer.data.element.Entity)10 User (uk.gov.gchq.gaffer.user.User)10 Ignore (org.junit.Ignore)6 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 JavaHBaseContext (org.apache.hadoop.hbase.spark.JavaHBaseContext)5 Test (org.testng.annotations.Test)5 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)5