Search in sources :

Example 61 with JavaSparkContext

use of org.apache.spark.api.java.JavaSparkContext in project incubator-systemml by apache.

the class MLContextUtil method getJavaSparkContextFromProxy.

/**
	 * Obtain the Java Spark Context from the MLContextProxy
	 *
	 * @return the Java Spark Context
	 */
public static JavaSparkContext getJavaSparkContextFromProxy() {
    MLContext activeMLContext = MLContextProxy.getActiveMLContextForAPI();
    JavaSparkContext jsc = getJavaSparkContext(activeMLContext);
    return jsc;
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 62 with JavaSparkContext

use of org.apache.spark.api.java.JavaSparkContext in project incubator-systemml by apache.

the class SparkExecutionContext method initSparkContext.

private static synchronized void initSparkContext() {
    //check for redundant spark context init
    if (_spctx != null)
        return;
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    //create a default spark context (master, appname, etc refer to system properties
    //as given in the spark configuration or during spark-submit)
    MLContext mlCtxObj = MLContextProxy.getActiveMLContext();
    if (mlCtxObj != null) {
        // This is when DML is called through spark shell
        // Will clean the passing of static variables later as this involves minimal change to DMLScript
        _spctx = MLContextUtil.getJavaSparkContext(mlCtxObj);
    } else {
        if (DMLScript.USE_LOCAL_SPARK_CONFIG) {
            // For now set 4 cores for integration testing :)
            SparkConf conf = createSystemMLSparkConf().setMaster("local[*]").setAppName("My local integration test app");
            // This is discouraged in spark but have added only for those testcase that cannot stop the context properly
            // conf.set("spark.driver.allowMultipleContexts", "true");
            conf.set("spark.ui.enabled", "false");
            _spctx = new JavaSparkContext(conf);
        } else //default cluster setup
        {
            //setup systemml-preferred spark configuration (w/o user choice)
            SparkConf conf = createSystemMLSparkConf();
            _spctx = new JavaSparkContext(conf);
        }
        _parRDDs.clear();
    }
    // Set warning if spark.driver.maxResultSize is not set. It needs to be set before starting Spark Context for CP collect
    String strDriverMaxResSize = _spctx.getConf().get("spark.driver.maxResultSize", "1g");
    long driverMaxResSize = UtilFunctions.parseMemorySize(strDriverMaxResSize);
    if (driverMaxResSize != 0 && driverMaxResSize < OptimizerUtils.getLocalMemBudget() && !DMLScript.USE_LOCAL_SPARK_CONFIG)
        LOG.warn("Configuration parameter spark.driver.maxResultSize set to " + UtilFunctions.formatMemorySize(driverMaxResSize) + "." + " You can set it through Spark default configuration setting either to 0 (unlimited) or to available memory budget of size " + UtilFunctions.formatMemorySize((long) OptimizerUtils.getLocalMemBudget()) + ".");
    //TODO if spark context passed in from outside (mlcontext), we need to clean this up at the end
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(_spctx.hadoopConfiguration());
    //statistics maintenance
    if (DMLScript.STATISTICS) {
        Statistics.setSparkCtxCreateTime(System.nanoTime() - t0);
    }
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) MLContext(org.apache.sysml.api.mlcontext.MLContext)

Example 63 with JavaSparkContext

use of org.apache.spark.api.java.JavaSparkContext in project ignite by apache.

the class JavaStandaloneIgniteRDDSelfTest method testAllFieldsTypes.

/**
     * @throws Exception If failed.
     */
public void testAllFieldsTypes() throws Exception {
    JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
    final int cnt = 100;
    try {
        JavaIgniteContext<String, EntityTestAllTypeFields> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
        JavaIgniteRDD<String, EntityTestAllTypeFields> cache = ic.fromCache(ENTITY_ALL_TYPES_CACHE_NAME);
        cache.savePairs(sc.parallelize(F.range(0, cnt), 2).mapToPair(INT_TO_ENTITY_ALL_FIELDS_F));
        EntityTestAllTypeFields e = new EntityTestAllTypeFields(cnt / 2);
        for (Field f : EntityTestAllTypeFields.class.getDeclaredFields()) {
            String fieldName = f.getName();
            Object val = GridTestUtils.getFieldValue(e, fieldName);
            Dataset<Row> df = cache.sql(String.format("select %s from EntityTestAllTypeFields where %s = ?", fieldName, fieldName), val);
            if (val instanceof BigDecimal) {
                Object res = ((Row[]) df.collect())[0].get(0);
                assertTrue(String.format("+++ Fail on %s field", fieldName), ((Comparable<BigDecimal>) val).compareTo((BigDecimal) res) == 0);
            } else if (val instanceof java.sql.Date)
                assertEquals(String.format("+++ Fail on %s field", fieldName), val.toString(), ((Row[]) df.collect())[0].get(0).toString());
            else if (val.getClass().isArray())
                assertTrue(String.format("+++ Fail on %s field", fieldName), 1 <= df.count());
            else {
                assertTrue(String.format("+++ Fail on %s field", fieldName), ((Row[]) df.collect()).length > 0);
                assertTrue(String.format("+++ Fail on %s field", fieldName), ((Row[]) df.collect())[0].size() > 0);
                assertEquals(String.format("+++ Fail on %s field", fieldName), val, ((Row[]) df.collect())[0].get(0));
            }
            info(String.format("+++ Query on the filed: %s : %s passed", fieldName, f.getType().getSimpleName()));
        }
    } finally {
        sc.stop();
    }
}
Also used : BigDecimal(java.math.BigDecimal) Field(java.lang.reflect.Field) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row)

Example 64 with JavaSparkContext

use of org.apache.spark.api.java.JavaSparkContext in project ignite by apache.

the class JavaStandaloneIgniteRDDSelfTest method testReadDataFromIgnite.

/**
     * @throws Exception If failed.
     */
public void testReadDataFromIgnite() throws Exception {
    JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
    try {
        JavaIgniteContext<String, Integer> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
        Ignite ignite = Ignition.ignite("grid-0");
        IgniteCache<String, Integer> cache = ignite.cache(ENTITY_CACHE_NAME);
        for (int i = 0; i < KEYS_CNT; i++) cache.put(String.valueOf(i), i);
        JavaRDD<Integer> values = ic.fromCache(ENTITY_CACHE_NAME).map(STR_INT_PAIR_TO_INT_F);
        int sum = values.fold(0, SUM_F);
        int expSum = (KEYS_CNT * KEYS_CNT + KEYS_CNT) / 2 - KEYS_CNT;
        assertEquals(expSum, sum);
    } finally {
        sc.stop();
    }
}
Also used : Ignite(org.apache.ignite.Ignite) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 65 with JavaSparkContext

use of org.apache.spark.api.java.JavaSparkContext in project ignite by apache.

the class JavaEmbeddedIgniteRDDSelfTest method testStoreDataToIgnite.

/**
     * @throws Exception If failed.
     */
public void testStoreDataToIgnite() throws Exception {
    JavaSparkContext sc = createContext();
    JavaIgniteContext<String, String> ic = null;
    try {
        ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
        ic.fromCache(PARTITIONED_CACHE_NAME).savePairs(sc.parallelize(F.range(0, KEYS_CNT), GRID_CNT).mapToPair(TO_PAIR_F), true);
        Ignite ignite = ic.ignite();
        IgniteCache<String, String> cache = ignite.cache(PARTITIONED_CACHE_NAME);
        for (int i = 0; i < KEYS_CNT; i++) {
            String val = cache.get(String.valueOf(i));
            assertNotNull("Value was not put to cache for key: " + i, val);
            assertEquals("Invalid value stored for key: " + i, "val" + i, val);
        }
    } finally {
        if (ic != null)
            ic.close(true);
        sc.stop();
    }
}
Also used : Ignite(org.apache.ignite.Ignite) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Aggregations

JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)251 Test (org.testng.annotations.Test)65 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)64 Tuple2 (scala.Tuple2)48 SparkConf (org.apache.spark.SparkConf)46 Test (org.junit.Test)43 ArrayList (java.util.ArrayList)41 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)32 List (java.util.List)26 Configuration (org.apache.hadoop.conf.Configuration)23 JavaRDD (org.apache.spark.api.java.JavaRDD)23 File (java.io.File)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)20 Collectors (java.util.stream.Collectors)16 TextPipeline (org.deeplearning4j.spark.text.functions.TextPipeline)15 DataSet (org.nd4j.linalg.dataset.DataSet)15 IOException (java.io.IOException)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)12 RealMatrix (org.apache.commons.math3.linear.RealMatrix)12 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)11