use of com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException in project spark-dataflow by cloudera.
the class TransformTranslator method combineGlobally.
private static <I, A, O> TransformEvaluator<Combine.Globally<I, O>> combineGlobally() {
return new TransformEvaluator<Combine.Globally<I, O>>() {
@Override
public void evaluate(Combine.Globally<I, O> transform, EvaluationContext context) {
final Combine.CombineFn<I, A, O> globally = COMBINE_GLOBALLY_FG.get("fn", transform);
@SuppressWarnings("unchecked") JavaRDDLike<WindowedValue<I>, ?> inRdd = (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);
final Coder<I> iCoder = context.getInput(transform).getCoder();
final Coder<A> aCoder;
try {
aCoder = globally.getAccumulatorCoder(context.getPipeline().getCoderRegistry(), iCoder);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
// Use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
JavaRDD<byte[]> inRddBytes = inRdd.map(WindowingHelpers.<I>unwindowFunction()).map(CoderHelpers.toByteFunction(iCoder));
/*A*/
byte[] acc = inRddBytes.aggregate(CoderHelpers.toByteArray(globally.createAccumulator(), aCoder), new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] ab, /*I*/
byte[] ib) throws Exception {
A a = CoderHelpers.fromByteArray(ab, aCoder);
I i = CoderHelpers.fromByteArray(ib, iCoder);
return CoderHelpers.toByteArray(globally.addInput(a, i), aCoder);
}
}, new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] a1b, /*A*/
byte[] a2b) throws Exception {
A a1 = CoderHelpers.fromByteArray(a1b, aCoder);
A a2 = CoderHelpers.fromByteArray(a2b, aCoder);
// don't use Guava's ImmutableList.of as values may be null
List<A> accumulators = Collections.unmodifiableList(Arrays.asList(a1, a2));
A merged = globally.mergeAccumulators(accumulators);
return CoderHelpers.toByteArray(merged, aCoder);
}
});
O output = globally.extractOutput(CoderHelpers.fromByteArray(acc, aCoder));
Coder<O> coder = context.getOutput(transform).getCoder();
JavaRDD<byte[]> outRdd = context.getSparkContext().parallelize(// don't use Guava's ImmutableList.of as output may be null
CoderHelpers.toByteArrays(Collections.singleton(output), coder));
context.setOutputRDD(transform, outRdd.map(CoderHelpers.fromByteFunction(coder)).map(WindowingHelpers.<O>windowFunction()));
}
};
}
Aggregations