Search in sources :

Example 26 with SparkConf

use of org.apache.spark.SparkConf in project Gaffer by gchq.

the class ImportRDDOfElementsHandlerTest method checkImportRDDOfElements.

@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final RDD<Element> elementRDD = sparkContext.parallelize(elements, 8, ELEMENT_CLASS_TAG);
    final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder().sparkContext(sparkContext).input(elementRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) ImportRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.ImportRDDOfElements) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) ArrayBuffer(scala.collection.mutable.ArrayBuffer) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 27 with SparkConf

use of org.apache.spark.SparkConf in project Gaffer by gchq.

the class GetJavaRDDOfElementsExample method runExamples.

@Override
public void runExamples() {
    // Need to actively turn logging on and off as needed as Spark produces some logs
    // even when the log level is set to off.
    ROOT_LOGGER.setLevel(Level.OFF);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("GetJavaRDDOfElementsExample").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sc = new JavaSparkContext(sparkConf);
    sc.setLogLevel("OFF");
    final Graph graph = getGraph();
    try {
        getJavaRddOfElements(sc, graph);
        getJavaRddOfElementsReturningEdgesOnly(sc, graph);
    } catch (final OperationException e) {
        throw new RuntimeException(e);
    }
    sc.stop();
    ROOT_LOGGER.setLevel(Level.INFO);
}
Also used : Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) OperationException(uk.gov.gchq.gaffer.operation.OperationException)

Example 28 with SparkConf

use of org.apache.spark.SparkConf in project azure-tools-for-java by Microsoft.

the class JavaSparkPi method main.

public static void main(String[] args) throws Exception {
    //use this line if you want to run your application in the cluster
    //SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi");
    SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi").setMaster("local[2]");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
    int n = 100000 * slices;
    List<Integer> l = new ArrayList<Integer>(n);
    for (int i = 0; i < n; i++) {
        l.add(i);
    }
    JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);
    int count = dataSet.map(new Function<Integer, Integer>() {

        @Override
        public Integer call(Integer integer) {
            double x = Math.random() * 2 - 1;
            double y = Math.random() * 2 - 1;
            return (x * x + y * y < 1) ? 1 : 0;
        }
    }).reduce(new Function2<Integer, Integer, Integer>() {

        @Override
        public Integer call(Integer integer, Integer integer2) {
            return integer + integer2;
        }
    });
    System.out.println("Pi is roughly " + 4.0 * count / n);
    jsc.stop();
}
Also used : Function(org.apache.spark.api.java.function.Function) ArrayList(java.util.ArrayList) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Example 29 with SparkConf

use of org.apache.spark.SparkConf in project geode by apache.

the class OQLJavaDemo method main.

public static void main(String[] argv) {
    if (argv.length != 1) {
        System.err.printf("Usage: OQLJavaDemo <locators>\n");
        return;
    }
    SparkConf conf = new SparkConf().setAppName("OQLJavaDemo");
    // "192.168.1.47[10335]"
    conf.set(GeodeLocatorPropKey, argv[0]);
    JavaSparkContext sc = new JavaSparkContext(conf);
    SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
    DataFrame df = javaFunctions(sqlContext).geodeOQL("select * from /str_str_region");
    System.out.println("======= DataFrame =======\n");
    df.show();
    sc.stop();
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) DataFrame(org.apache.spark.sql.DataFrame) SparkConf(org.apache.spark.SparkConf) SQLContext(org.apache.spark.sql.SQLContext)

Example 30 with SparkConf

use of org.apache.spark.SparkConf in project geode by apache.

the class RDDSaveJavaDemo method main.

public static void main(String[] argv) {
    if (argv.length != 1) {
        System.err.printf("Usage: RDDSaveJavaDemo <locators>\n");
        return;
    }
    SparkConf conf = new SparkConf().setAppName("RDDSaveJavaDemo");
    conf.set(GeodeLocatorPropKey, argv[0]);
    JavaSparkContext sc = new JavaSparkContext(conf);
    List<String> data = new ArrayList<String>();
    data.add("abcdefg");
    data.add("abcdefgh");
    data.add("abcdefghi");
    JavaRDD<String> rdd = sc.parallelize(data);
    GeodeConnectionConf connConf = GeodeConnectionConf.apply(conf);
    PairFunction<String, String, Integer> func = new PairFunction<String, String, Integer>() {

        @Override
        public Tuple2<String, Integer> call(String s) throws Exception {
            return new Tuple2<String, Integer>(s, s.length());
        }
    };
    javaFunctions(rdd).saveToGeode("str_int_region", func, connConf);
    sc.stop();
}
Also used : GeodeConnectionConf(org.apache.geode.spark.connector.GeodeConnectionConf) Tuple2(scala.Tuple2) ArrayList(java.util.ArrayList) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PairFunction(org.apache.spark.api.java.function.PairFunction) SparkConf(org.apache.spark.SparkConf)

Aggregations

SparkConf (org.apache.spark.SparkConf)83 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)46 Test (org.junit.Test)21 ArrayList (java.util.ArrayList)20 Configuration (org.apache.hadoop.conf.Configuration)20 Tuple2 (scala.Tuple2)15 Graph (uk.gov.gchq.gaffer.graph.Graph)13 DataOutputStream (java.io.DataOutputStream)11 File (java.io.File)10 HashSet (java.util.HashSet)10 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)10 Edge (uk.gov.gchq.gaffer.data.element.Edge)10 Element (uk.gov.gchq.gaffer.data.element.Element)10 Entity (uk.gov.gchq.gaffer.data.element.Entity)10 User (uk.gov.gchq.gaffer.user.User)10 Ignore (org.junit.Ignore)6 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 JavaHBaseContext (org.apache.hadoop.hbase.spark.JavaHBaseContext)5 Test (org.testng.annotations.Test)5 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)5