Search in sources :

Example 11 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class StatefulParDoEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<K, KV<K, InputT>>> createEvaluator(AppliedPTransform<PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>, PCollectionTuple, StatefulParDo<K, InputT, OutputT>> application, CommittedBundle<KeyedWorkItem<K, KV<K, InputT>>> inputBundle) throws Exception {
    final DoFn<KV<K, InputT>, OutputT> doFn = application.getTransform().getUnderlyingParDo().getFn();
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    // cache is used to limit the number of tasks to avoid performance degradation.
    if (signature.stateDeclarations().size() > 0) {
        for (final WindowedValue<?> element : inputBundle.getElements()) {
            for (final BoundedWindow window : element.getWindows()) {
                cleanupRegistry.get(AppliedPTransformOutputKeyAndWindow.create(application, (StructuralKey<K>) inputBundle.getKey(), window));
            }
        }
    }
    DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator = delegateFactory.createEvaluator((AppliedPTransform) application, inputBundle.getKey(), doFn, application.getTransform().getUnderlyingParDo().getSideInputs(), application.getTransform().getUnderlyingParDo().getMainOutputTag(), application.getTransform().getUnderlyingParDo().getAdditionalOutputTags().getAll());
    return new StatefulParDoEvaluator<>(delegateEvaluator);
}
Also used : BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 12 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class ParDoMultiOverrideFactory method getReplacementTransform.

@SuppressWarnings("unchecked")
private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementTransform(MultiOutput<InputT, OutputT> transform) {
    DoFn<InputT, OutputT> fn = transform.getFn();
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    if (signature.processElement().isSplittable()) {
        return new SplittableParDo(transform);
    } else if (signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0) {
        // Based on the fact that the signature is stateful, DoFnSignatures ensures
        // that it is also keyed
        MultiOutput<KV<?, ?>, OutputT> keyedTransform = (MultiOutput<KV<?, ?>, OutputT>) transform;
        return new GbkThenStatefulParDo(keyedTransform);
    } else {
        return transform;
    }
}
Also used : SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) KV(org.apache.beam.sdk.values.KV) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput)

Example 13 with DoFnSignature

use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.

the class TransformTranslator method parDo.

private static <InputT, OutputT> TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>> parDo() {
    return new TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>>() {

        @Override
        @SuppressWarnings("unchecked")
        public void evaluate(ParDo.MultiOutput<InputT, OutputT> transform, EvaluationContext context) {
            String stepName = context.getCurrentTransform().getFullName();
            DoFn<InputT, OutputT> doFn = transform.getFn();
            rejectSplittable(doFn);
            JavaRDD<WindowedValue<InputT>> inRDD = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
            WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
            Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
            Accumulator<MetricsContainerStepMap> metricsAccum = MetricsAccumulator.getInstance();
            JavaPairRDD<TupleTag<?>, WindowedValue<?>> all;
            DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
            boolean stateful = signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0;
            MultiDoFnFunction<InputT, OutputT> multiDoFnFunction = new MultiDoFnFunction<>(aggAccum, metricsAccum, stepName, doFn, context.getRuntimeContext(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy, stateful);
            if (stateful) {
                // Based on the fact that the signature is stateful, DoFnSignatures ensures
                // that it is also keyed
                all = statefulParDoTransform((KvCoder) context.getInput(transform).getCoder(), windowingStrategy.getWindowFn().windowCoder(), (JavaRDD) inRDD, (MultiDoFnFunction) multiDoFnFunction);
            } else {
                all = inRDD.mapPartitionsToPair(multiDoFnFunction);
            }
            Map<TupleTag<?>, PValue> outputs = context.getOutputs(transform);
            if (outputs.size() > 1) {
                // cache the RDD if we're going to filter it more than once.
                all.cache();
            }
            for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
                JavaPairRDD<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TranslationUtils.TupleTagFilter(output.getKey()));
                // Object is the best we can do since different outputs can have different tags
                JavaRDD<WindowedValue<Object>> values = (JavaRDD<WindowedValue<Object>>) (JavaRDD<?>) filtered.values();
                context.putDataset(output.getValue(), new BoundedDataset<>(values));
            }
        }

        @Override
        public String toNativeString() {
            return "mapPartitions(new <fn>())";
        }
    };
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowedValue(org.apache.beam.sdk.util.WindowedValue) NamedAggregators(org.apache.beam.runners.spark.aggregators.NamedAggregators) KvCoder(org.apache.beam.sdk.coders.KvCoder) PValue(org.apache.beam.sdk.values.PValue) JavaRDD(org.apache.spark.api.java.JavaRDD) ParDo(org.apache.beam.sdk.transforms.ParDo) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Aggregations

DoFnSignature (org.apache.beam.sdk.transforms.reflect.DoFnSignature)13 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)4 PTransformMatcher (org.apache.beam.sdk.runners.PTransformMatcher)4 Map (java.util.Map)2 ParDo (org.apache.beam.sdk.transforms.ParDo)2 KV (org.apache.beam.sdk.values.KV)2 PValue (org.apache.beam.sdk.values.PValue)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 OutputPort (com.datatorrent.api.Operator.OutputPort)1 ByteString (com.google.protobuf.ByteString)1 ApexParDoOperator (org.apache.beam.runners.apex.translation.operators.ApexParDoOperator)1 SplittableParDo (org.apache.beam.runners.core.construction.SplittableParDo)1 MetricsContainerStepMap (org.apache.beam.runners.core.metrics.MetricsContainerStepMap)1 NamedAggregators (org.apache.beam.runners.spark.aggregators.NamedAggregators)1 KvCoder (org.apache.beam.sdk.coders.KvCoder)1 RunnerApi (org.apache.beam.sdk.common.runner.v1.RunnerApi)1 ParDoPayload (org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload)1 StateSpec (org.apache.beam.sdk.state.StateSpec)1 DoFn (org.apache.beam.sdk.transforms.DoFn)1 MultiOutput (org.apache.beam.sdk.transforms.ParDo.MultiOutput)1