use of org.apache.spark.api.java.function.ReduceFunction in project net.jgp.labs.spark by jgperrin.
the class PiComputeLambdaApp method start.
/**
* The processing code.
*/
private void start(int slices) {
int numberOfThrows = 100000 * slices;
System.out.println("About to throw " + numberOfThrows + " darts, ready? Stay away from the target!");
long t0 = System.currentTimeMillis();
SparkSession spark = SparkSession.builder().appName("Spark Pi with lambdas").master("local[*]").getOrCreate();
long t1 = System.currentTimeMillis();
System.out.println("Session initialized in " + (t1 - t0) + " ms");
List<Integer> l = new ArrayList<>(numberOfThrows);
for (int i = 0; i < numberOfThrows; i++) {
l.add(i);
}
Dataset<Row> incrementalDf = spark.createDataset(l, Encoders.INT()).toDF();
long t2 = System.currentTimeMillis();
System.out.println("Initial dataframe built in " + (t2 - t1) + " ms");
Dataset<Integer> dotsDs = incrementalDf.map((MapFunction<Row, Integer>) status -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
counter++;
if (counter % 100000 == 0) {
System.out.println("" + counter + " darts thrown so far");
}
return (x * x + y * y <= 1) ? 1 : 0;
}, Encoders.INT());
long t3 = System.currentTimeMillis();
System.out.println("Throwing darts done in " + (t3 - t2) + " ms");
int dartsInCircle = dotsDs.reduce((ReduceFunction<Integer>) (x, y) -> x + y);
long t4 = System.currentTimeMillis();
System.out.println("Analyzing result in " + (t4 - t3) + " ms");
System.out.println("Pi is roughly " + 4.0 * dartsInCircle / numberOfThrows);
spark.stop();
}
Aggregations