Search in sources :

Example 1 with ReduceFunction

use of org.apache.spark.api.java.function.ReduceFunction in project net.jgp.labs.spark by jgperrin.

the class PiComputeLambdaApp method start.

/**
 * The processing code.
 */
private void start(int slices) {
    int numberOfThrows = 100000 * slices;
    System.out.println("About to throw " + numberOfThrows + " darts, ready? Stay away from the target!");
    long t0 = System.currentTimeMillis();
    SparkSession spark = SparkSession.builder().appName("Spark Pi with lambdas").master("local[*]").getOrCreate();
    long t1 = System.currentTimeMillis();
    System.out.println("Session initialized in " + (t1 - t0) + " ms");
    List<Integer> l = new ArrayList<>(numberOfThrows);
    for (int i = 0; i < numberOfThrows; i++) {
        l.add(i);
    }
    Dataset<Row> incrementalDf = spark.createDataset(l, Encoders.INT()).toDF();
    long t2 = System.currentTimeMillis();
    System.out.println("Initial dataframe built in " + (t2 - t1) + " ms");
    Dataset<Integer> dotsDs = incrementalDf.map((MapFunction<Row, Integer>) status -> {
        double x = Math.random() * 2 - 1;
        double y = Math.random() * 2 - 1;
        counter++;
        if (counter % 100000 == 0) {
            System.out.println("" + counter + " darts thrown so far");
        }
        return (x * x + y * y <= 1) ? 1 : 0;
    }, Encoders.INT());
    long t3 = System.currentTimeMillis();
    System.out.println("Throwing darts done in " + (t3 - t2) + " ms");
    int dartsInCircle = dotsDs.reduce((ReduceFunction<Integer>) (x, y) -> x + y);
    long t4 = System.currentTimeMillis();
    System.out.println("Analyzing result in " + (t4 - t3) + " ms");
    System.out.println("Pi is roughly " + 4.0 * dartsInCircle / numberOfThrows);
    spark.stop();
}
Also used : List(java.util.List) Dataset(org.apache.spark.sql.Dataset) Row(org.apache.spark.sql.Row) ReduceFunction(org.apache.spark.api.java.function.ReduceFunction) MapFunction(org.apache.spark.api.java.function.MapFunction) Encoders(org.apache.spark.sql.Encoders) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) SparkSession(org.apache.spark.sql.SparkSession) SparkSession(org.apache.spark.sql.SparkSession) ArrayList(java.util.ArrayList) Row(org.apache.spark.sql.Row)

Aggregations

Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 MapFunction (org.apache.spark.api.java.function.MapFunction)1 ReduceFunction (org.apache.spark.api.java.function.ReduceFunction)1 Dataset (org.apache.spark.sql.Dataset)1 Encoders (org.apache.spark.sql.Encoders)1 Row (org.apache.spark.sql.Row)1 SparkSession (org.apache.spark.sql.SparkSession)1