use of com.google.cloud.dataflow.sdk.values.TupleTag in project spark-dataflow by cloudera.
the class TransformTranslator method multiDo.
private static <I, O> TransformEvaluator<ParDo.BoundMulti<I, O>> multiDo() {
return new TransformEvaluator<ParDo.BoundMulti<I, O>>() {
@Override
public void evaluate(ParDo.BoundMulti<I, O> transform, EvaluationContext context) {
TupleTag<O> mainOutputTag = MULTIDO_FG.get("mainOutputTag", transform);
MultiDoFnFunction<I, O> multifn = new MultiDoFnFunction<>(transform.getFn(), context.getRuntimeContext(), mainOutputTag, getSideInputs(transform.getSideInputs(), context));
@SuppressWarnings("unchecked") JavaRDDLike<WindowedValue<I>, ?> inRDD = (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);
JavaPairRDD<TupleTag<?>, WindowedValue<?>> all = inRDD.mapPartitionsToPair(multifn).cache();
PCollectionTuple pct = context.getOutput(transform);
for (Map.Entry<TupleTag<?>, PCollection<?>> e : pct.getAll().entrySet()) {
@SuppressWarnings("unchecked") JavaPairRDD<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TupleTagFilter(e.getKey()));
@SuppressWarnings("unchecked") JavaRDD<WindowedValue<Object>> // Object is the best we can do since different outputs can have different tags
values = (JavaRDD<WindowedValue<Object>>) (JavaRDD<?>) filtered.values();
context.setRDD(e.getValue(), values);
}
}
};
}
Aggregations