Search in sources :

Example 1 with SparkCombineFn

use of org.apache.beam.runners.spark.translation.SparkCombineFn in project beam by apache.

the class SparkCompat method extractOutput.

/**
 * Extracts the output for a given collection of WindowedAccumulators.
 *
 * <p>This is required because the API of JavaPairRDD.flatMapValues is different among Spark
 * versions. See https://issues.apache.org/jira/browse/SPARK-19287
 */
public static <K, InputT, AccumT, OutputT> JavaPairRDD<K, WindowedValue<OutputT>> extractOutput(JavaPairRDD<K, SparkCombineFn.WindowedAccumulator<KV<K, InputT>, InputT, AccumT, ?>> accumulatePerKey, SparkCombineFn<KV<K, InputT>, InputT, AccumT, OutputT> sparkCombineFn) {
    try {
        if (accumulatePerKey.context().version().startsWith("3")) {
            FlatMapFunction<SparkCombineFn.WindowedAccumulator<KV<K, InputT>, InputT, AccumT, ?>, WindowedValue<OutputT>> flatMapFunction = (FlatMapFunction<SparkCombineFn.WindowedAccumulator<KV<K, InputT>, InputT, AccumT, ?>, WindowedValue<OutputT>>) windowedAccumulator -> sparkCombineFn.extractOutputStream(windowedAccumulator).iterator();
            // This invokes by reflection the equivalent of:
            // return accumulatePerKey.flatMapValues(flatMapFunction);
            Method method = accumulatePerKey.getClass().getDeclaredMethod("flatMapValues", FlatMapFunction.class);
            Object result = method.invoke(accumulatePerKey, flatMapFunction);
            return (JavaPairRDD<K, WindowedValue<OutputT>>) result;
        }
        Function<SparkCombineFn.WindowedAccumulator<KV<K, InputT>, InputT, AccumT, ?>, Iterable<WindowedValue<OutputT>>> flatMapFunction = windowedAccumulator -> sparkCombineFn.extractOutputStream(windowedAccumulator).collect(Collectors.toList());
        // This invokes by reflection the equivalent of:
        // return accumulatePerKey.flatMapValues(flatMapFunction);
        Method method = accumulatePerKey.getClass().getDeclaredMethod("flatMapValues", Function.class);
        Object result = method.invoke(accumulatePerKey, flatMapFunction);
        return (JavaPairRDD<K, WindowedValue<OutputT>>) result;
    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
        throw new RuntimeException("Error invoking Spark flatMapValues", e);
    }
}
Also used : SparkListenerApplicationStart(org.apache.spark.scheduler.SparkListenerApplicationStart) SparkCombineFn(org.apache.beam.runners.spark.translation.SparkCombineFn) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) PipelineResult(org.apache.beam.sdk.PipelineResult) ApplicationNameOptions(org.apache.beam.sdk.options.ApplicationNameOptions) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(scala.Option) Constructor(java.lang.reflect.Constructor) Collectors(java.util.stream.Collectors) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) InvocationTargetException(java.lang.reflect.InvocationTargetException) SparkBeamMetric(org.apache.beam.runners.spark.metrics.SparkBeamMetric) List(java.util.List) JavaConverters(scala.collection.JavaConverters) JavaDStream(org.apache.spark.streaming.api.java.JavaDStream) Function(org.apache.spark.api.java.function.Function) Method(java.lang.reflect.Method) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) KV(org.apache.beam.sdk.values.KV) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) WindowedValue(org.apache.beam.sdk.util.WindowedValue) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) SparkCombineFn(org.apache.beam.runners.spark.translation.SparkCombineFn) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD)

Aggregations

Constructor (java.lang.reflect.Constructor)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 SparkPipelineOptions (org.apache.beam.runners.spark.SparkPipelineOptions)1 SparkBeamMetric (org.apache.beam.runners.spark.metrics.SparkBeamMetric)1 SparkCombineFn (org.apache.beam.runners.spark.translation.SparkCombineFn)1 PipelineResult (org.apache.beam.sdk.PipelineResult)1 ApplicationNameOptions (org.apache.beam.sdk.options.ApplicationNameOptions)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 FlatMapFunction (org.apache.spark.api.java.function.FlatMapFunction)1 Function (org.apache.spark.api.java.function.Function)1 SparkListenerApplicationStart (org.apache.spark.scheduler.SparkListenerApplicationStart)1 JavaDStream (org.apache.spark.streaming.api.java.JavaDStream)1 JavaStreamingContext (org.apache.spark.streaming.api.java.JavaStreamingContext)1 Option (scala.Option)1