use of org.apache.spark.api.java.function.MapFunction in project net.jgp.labs.spark by jgperrin.
the class PiComputeLambdaApp method start.
/**
* The processing code.
*/
private void start(int slices) {
int numberOfThrows = 100000 * slices;
System.out.println("About to throw " + numberOfThrows + " darts, ready? Stay away from the target!");
long t0 = System.currentTimeMillis();
SparkSession spark = SparkSession.builder().appName("Spark Pi with lambdas").master("local[*]").getOrCreate();
long t1 = System.currentTimeMillis();
System.out.println("Session initialized in " + (t1 - t0) + " ms");
List<Integer> l = new ArrayList<>(numberOfThrows);
for (int i = 0; i < numberOfThrows; i++) {
l.add(i);
}
Dataset<Row> incrementalDf = spark.createDataset(l, Encoders.INT()).toDF();
long t2 = System.currentTimeMillis();
System.out.println("Initial dataframe built in " + (t2 - t1) + " ms");
Dataset<Integer> dotsDs = incrementalDf.map((MapFunction<Row, Integer>) status -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
counter++;
if (counter % 100000 == 0) {
System.out.println("" + counter + " darts thrown so far");
}
return (x * x + y * y <= 1) ? 1 : 0;
}, Encoders.INT());
long t3 = System.currentTimeMillis();
System.out.println("Throwing darts done in " + (t3 - t2) + " ms");
int dartsInCircle = dotsDs.reduce((ReduceFunction<Integer>) (x, y) -> x + y);
long t4 = System.currentTimeMillis();
System.out.println("Analyzing result in " + (t4 - t3) + " ms");
System.out.println("Pi is roughly " + 4.0 * dartsInCircle / numberOfThrows);
spark.stop();
}
use of org.apache.spark.api.java.function.MapFunction in project beam by apache.
the class ParDoTranslatorBatch method translateTransform.
@Override
public void translateTransform(PTransform<PCollection<InputT>, PCollectionTuple> transform, AbstractTranslationContext context) {
String stepName = context.getCurrentTransform().getFullName();
// Check for not supported advanced features
// TODO: add support of Splittable DoFn
DoFn<InputT, OutputT> doFn = getDoFn(context);
checkState(!DoFnSignatures.isSplittable(doFn), "Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn);
// TODO: add support of states and timers
checkState(!DoFnSignatures.isStateful(doFn), "States and timers are not supported for the moment.");
checkState(!DoFnSignatures.requiresTimeSortedInput(doFn), "@RequiresTimeSortedInput is not " + "supported for the moment");
DoFnSchemaInformation doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
// Init main variables
PValue input = context.getInput();
Dataset<WindowedValue<InputT>> inputDataSet = context.getDataset(input);
Map<TupleTag<?>, PCollection<?>> outputs = context.getOutputs();
TupleTag<?> mainOutputTag = getTupleTag(context);
List<TupleTag<?>> outputTags = new ArrayList<>(outputs.keySet());
WindowingStrategy<?, ?> windowingStrategy = ((PCollection<InputT>) input).getWindowingStrategy();
Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
Coder<? extends BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
// construct a map from side input to WindowingStrategy so that
// the DoFn runner can map main-input windows to side input windows
List<PCollectionView<?>> sideInputs = getSideInputs(context);
Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
for (PCollectionView<?> sideInput : sideInputs) {
sideInputStrategies.put(sideInput, sideInput.getPCollection().getWindowingStrategy());
}
SideInputBroadcast broadcastStateData = createBroadcastSideInputs(sideInputs, context);
Map<TupleTag<?>, Coder<?>> outputCoderMap = context.getOutputCoders();
MetricsContainerStepMapAccumulator metricsAccum = MetricsAccumulator.getInstance();
List<TupleTag<?>> additionalOutputTags = new ArrayList<>();
for (TupleTag<?> tag : outputTags) {
if (!tag.equals(mainOutputTag)) {
additionalOutputTags.add(tag);
}
}
Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
@SuppressWarnings("unchecked") DoFnFunction<InputT, OutputT> doFnWrapper = new DoFnFunction(metricsAccum, stepName, doFn, windowingStrategy, sideInputStrategies, context.getSerializableOptions(), additionalOutputTags, mainOutputTag, inputCoder, outputCoderMap, broadcastStateData, doFnSchemaInformation, sideInputMapping);
MultiOutputCoder multipleOutputCoder = MultiOutputCoder.of(SerializableCoder.of(TupleTag.class), outputCoderMap, windowCoder);
Dataset<Tuple2<TupleTag<?>, WindowedValue<?>>> allOutputs = inputDataSet.mapPartitions(doFnWrapper, EncoderHelpers.fromBeamCoder(multipleOutputCoder));
if (outputs.entrySet().size() > 1) {
allOutputs.persist();
for (Map.Entry<TupleTag<?>, PCollection<?>> output : outputs.entrySet()) {
pruneOutputFilteredByTag(context, allOutputs, output, windowCoder);
}
} else {
Coder<OutputT> outputCoder = ((PCollection<OutputT>) outputs.get(mainOutputTag)).getCoder();
Coder<WindowedValue<?>> windowedValueCoder = (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(outputCoder, windowCoder);
Dataset<WindowedValue<?>> outputDataset = allOutputs.map((MapFunction<Tuple2<TupleTag<?>, WindowedValue<?>>, WindowedValue<?>>) value -> value._2, EncoderHelpers.fromBeamCoder(windowedValueCoder));
context.putDatasetWildcard(outputs.entrySet().iterator().next().getValue(), outputDataset);
}
}
use of org.apache.spark.api.java.function.MapFunction in project beam by apache.
the class WindowingHelpers method assignWindowsMapFunction.
public static <T, W extends BoundedWindow> MapFunction<WindowedValue<T>, WindowedValue<T>> assignWindowsMapFunction(WindowFn<T, W> windowFn) {
return (MapFunction<WindowedValue<T>, WindowedValue<T>>) windowedValue -> {
final BoundedWindow boundedWindow = Iterables.getOnlyElement(windowedValue.getWindows());
final T element = windowedValue.getValue();
final Instant timestamp = windowedValue.getTimestamp();
Collection<W> windows = windowFn.assignWindows(windowFn.new AssignContext() {
@Override
public T element() {
return element;
}
@Override
public Instant timestamp() {
return timestamp;
}
@Override
public BoundedWindow window() {
return boundedWindow;
}
});
return WindowedValue.of(element, timestamp, windows, windowedValue.getPane());
};
}
Aggregations