use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class StatefulParDoEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<K, KV<K, InputT>>> createEvaluator(AppliedPTransform<PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>, PCollectionTuple, StatefulParDo<K, InputT, OutputT>> application, CommittedBundle<KeyedWorkItem<K, KV<K, InputT>>> inputBundle) throws Exception {
final DoFn<KV<K, InputT>, OutputT> doFn = application.getTransform().getUnderlyingParDo().getFn();
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
// cache is used to limit the number of tasks to avoid performance degradation.
if (signature.stateDeclarations().size() > 0) {
for (final WindowedValue<?> element : inputBundle.getElements()) {
for (final BoundedWindow window : element.getWindows()) {
cleanupRegistry.get(AppliedPTransformOutputKeyAndWindow.create(application, (StructuralKey<K>) inputBundle.getKey(), window));
}
}
}
DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator = delegateFactory.createEvaluator((AppliedPTransform) application, inputBundle.getKey(), doFn, application.getTransform().getUnderlyingParDo().getSideInputs(), application.getTransform().getUnderlyingParDo().getMainOutputTag(), application.getTransform().getUnderlyingParDo().getAdditionalOutputTags().getAll());
return new StatefulParDoEvaluator<>(delegateEvaluator);
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class ParDoMultiOverrideFactory method getReplacementTransform.
@SuppressWarnings("unchecked")
private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementTransform(MultiOutput<InputT, OutputT> transform) {
DoFn<InputT, OutputT> fn = transform.getFn();
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
if (signature.processElement().isSplittable()) {
return new SplittableParDo(transform);
} else if (signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0) {
// Based on the fact that the signature is stateful, DoFnSignatures ensures
// that it is also keyed
MultiOutput<KV<?, ?>, OutputT> keyedTransform = (MultiOutput<KV<?, ?>, OutputT>) transform;
return new GbkThenStatefulParDo(keyedTransform);
} else {
return transform;
}
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class TransformTranslator method parDo.
private static <InputT, OutputT> TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>> parDo() {
return new TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>>() {
@Override
@SuppressWarnings("unchecked")
public void evaluate(ParDo.MultiOutput<InputT, OutputT> transform, EvaluationContext context) {
String stepName = context.getCurrentTransform().getFullName();
DoFn<InputT, OutputT> doFn = transform.getFn();
rejectSplittable(doFn);
JavaRDD<WindowedValue<InputT>> inRDD = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
Accumulator<MetricsContainerStepMap> metricsAccum = MetricsAccumulator.getInstance();
JavaPairRDD<TupleTag<?>, WindowedValue<?>> all;
DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
boolean stateful = signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0;
MultiDoFnFunction<InputT, OutputT> multiDoFnFunction = new MultiDoFnFunction<>(aggAccum, metricsAccum, stepName, doFn, context.getRuntimeContext(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy, stateful);
if (stateful) {
// Based on the fact that the signature is stateful, DoFnSignatures ensures
// that it is also keyed
all = statefulParDoTransform((KvCoder) context.getInput(transform).getCoder(), windowingStrategy.getWindowFn().windowCoder(), (JavaRDD) inRDD, (MultiDoFnFunction) multiDoFnFunction);
} else {
all = inRDD.mapPartitionsToPair(multiDoFnFunction);
}
Map<TupleTag<?>, PValue> outputs = context.getOutputs(transform);
if (outputs.size() > 1) {
// cache the RDD if we're going to filter it more than once.
all.cache();
}
for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
JavaPairRDD<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TranslationUtils.TupleTagFilter(output.getKey()));
// Object is the best we can do since different outputs can have different tags
JavaRDD<WindowedValue<Object>> values = (JavaRDD<WindowedValue<Object>>) (JavaRDD<?>) filtered.values();
context.putDataset(output.getValue(), new BoundedDataset<>(values));
}
}
@Override
public String toNativeString() {
return "mapPartitions(new <fn>())";
}
};
}
Aggregations