Search in sources :

Example 6 with CannotProvideCoderException

use of org.apache.beam.sdk.coders.CannotProvideCoderException in project beam by apache.

the class TransformTranslator method combineGlobally.

private static <InputT, AccumT, OutputT> TransformEvaluator<Combine.Globally<InputT, OutputT>> combineGlobally() {
    return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() {

        @Override
        public void evaluate(Combine.Globally<InputT, OutputT> transform, EvaluationContext context) {
            final PCollection<InputT> input = context.getInput(transform);
            final Coder<InputT> iCoder = context.getInput(transform).getCoder();
            final Coder<OutputT> oCoder = context.getOutput(transform).getCoder();
            final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
            @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn = (CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn());
            final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder = WindowedValue.FullWindowedValueCoder.of(oCoder, windowingStrategy.getWindowFn().windowCoder());
            final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
            final boolean hasDefault = transform.isInsertDefault();
            final SparkGlobalCombineFn<InputT, AccumT, OutputT> sparkCombineFn = new SparkGlobalCombineFn<>(combineFn, runtimeContext, TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy);
            final Coder<AccumT> aCoder;
            try {
                aCoder = combineFn.getAccumulatorCoder(runtimeContext.getCoderRegistry(), iCoder);
            } catch (CannotProvideCoderException e) {
                throw new IllegalStateException("Could not determine coder for accumulator", e);
            }
            @SuppressWarnings("unchecked") JavaRDD<WindowedValue<InputT>> inRdd = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
            JavaRDD<WindowedValue<OutputT>> outRdd;
            Optional<Iterable<WindowedValue<AccumT>>> maybeAccumulated = GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, iCoder, aCoder, windowingStrategy);
            if (maybeAccumulated.isPresent()) {
                Iterable<WindowedValue<OutputT>> output = sparkCombineFn.extractOutput(maybeAccumulated.get());
                outRdd = context.getSparkContext().parallelize(CoderHelpers.toByteArrays(output, wvoCoder)).map(CoderHelpers.fromByteFunction(wvoCoder));
            } else {
                // handle empty input RDD, which will naturally skip the entire execution
                // as Spark will not run on empty RDDs.
                JavaSparkContext jsc = new JavaSparkContext(inRdd.context());
                if (hasDefault) {
                    OutputT defaultValue = combineFn.defaultValue();
                    outRdd = jsc.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder))).map(CoderHelpers.fromByteFunction(oCoder)).map(WindowingHelpers.<OutputT>windowFunction());
                } else {
                    outRdd = jsc.emptyRDD();
                }
            }
            context.putDataset(transform, new BoundedDataset<>(outRdd));
        }

        @Override
        public String toNativeString() {
            return "aggregate(..., new <fn>(), ...)";
        }
    };
}
Also used : FluentIterable(com.google.common.collect.FluentIterable) Combine(org.apache.beam.sdk.transforms.Combine) WindowedValue(org.apache.beam.sdk.util.WindowedValue) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) CombineWithContext(org.apache.beam.sdk.transforms.CombineWithContext) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException)

Aggregations

CannotProvideCoderException (org.apache.beam.sdk.coders.CannotProvideCoderException)6 ParameterizedType (java.lang.reflect.ParameterizedType)2 Type (java.lang.reflect.Type)2 Coder (org.apache.beam.sdk.coders.Coder)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 FluentIterable (com.google.common.collect.FluentIterable)1 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)1 CollectionCoder (org.apache.beam.sdk.coders.CollectionCoder)1 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)1 KvCoder (org.apache.beam.sdk.coders.KvCoder)1 ListCoder (org.apache.beam.sdk.coders.ListCoder)1 MapCoder (org.apache.beam.sdk.coders.MapCoder)1 SetCoder (org.apache.beam.sdk.coders.SetCoder)1 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)1 Combine (org.apache.beam.sdk.transforms.Combine)1 CombineWithContext (org.apache.beam.sdk.transforms.CombineWithContext)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 TimestampedValueCoder (org.apache.beam.sdk.values.TimestampedValue.TimestampedValueCoder)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1