Search in sources :

Example 1 with SparkKeyedCombineFn

use of org.apache.beam.runners.spark.translation.SparkKeyedCombineFn in project beam by apache.

the class StreamingTransformTranslator method combineGrouped.

private static <K, InputT, OutputT> TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>> combineGrouped() {
    return new TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>>() {

        @Override
        public void evaluate(final Combine.GroupedValues<K, InputT, OutputT> transform, EvaluationContext context) {
            // get the applied combine function.
            PCollection<? extends KV<K, ? extends Iterable<InputT>>> input = context.getInput(transform);
            final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
            @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> fn = (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn());
            @SuppressWarnings("unchecked") UnboundedDataset<KV<K, Iterable<InputT>>> unboundedDataset = ((UnboundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform));
            JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> dStream = unboundedDataset.getDStream();
            final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
            final SparkPCollectionView pviews = context.getPViews();
            JavaDStream<WindowedValue<KV<K, OutputT>>> outStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, Iterable<InputT>>>>, JavaRDD<WindowedValue<KV<K, OutputT>>>>() {

                @Override
                public JavaRDD<WindowedValue<KV<K, OutputT>>> call(JavaRDD<WindowedValue<KV<K, Iterable<InputT>>>> rdd) throws Exception {
                    SparkKeyedCombineFn<K, InputT, ?, OutputT> combineFnWithContext = new SparkKeyedCombineFn<>(fn, runtimeContext, TranslationUtils.getSideInputs(transform.getSideInputs(), new JavaSparkContext(rdd.context()), pviews), windowingStrategy);
                    return rdd.map(new TranslationUtils.CombineGroupedValues<>(combineFnWithContext));
                }
            });
            context.putDataset(transform, new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources()));
        }

        @Override
        public String toNativeString() {
            return "map(new <fn>())";
        }
    };
}
Also used : Combine(org.apache.beam.sdk.transforms.Combine) SparkKeyedCombineFn(org.apache.beam.runners.spark.translation.SparkKeyedCombineFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SparkRuntimeContext(org.apache.beam.runners.spark.translation.SparkRuntimeContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) CombineWithContext(org.apache.beam.sdk.transforms.CombineWithContext) KV(org.apache.beam.sdk.values.KV) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext) SparkPCollectionView(org.apache.beam.runners.spark.translation.SparkPCollectionView)

Aggregations

EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)1 SparkKeyedCombineFn (org.apache.beam.runners.spark.translation.SparkKeyedCombineFn)1 SparkPCollectionView (org.apache.beam.runners.spark.translation.SparkPCollectionView)1 SparkRuntimeContext (org.apache.beam.runners.spark.translation.SparkRuntimeContext)1 TransformEvaluator (org.apache.beam.runners.spark.translation.TransformEvaluator)1 Combine (org.apache.beam.sdk.transforms.Combine)1 CombineWithContext (org.apache.beam.sdk.transforms.CombineWithContext)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 JavaRDD (org.apache.spark.api.java.JavaRDD)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1