Search in sources :

Example 51 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class DoFnOperatorTest method testSideInputs.

public void testSideInputs(boolean keyed) throws Exception {
    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
    Coder<String> keyCoder = null;
    if (keyed) {
        keyCoder = StringUtf8Coder.of();
    }
    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<String>(), "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<String>(), WindowingStrategy.globalDefault(), sideInputMapping, /* side-input mapping */
    ImmutableList.<PCollectionView<?>>of(view1, view2), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder);
    TwoInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue, String> testHarness = new TwoInputStreamOperatorTestHarness<>(doFnOperator);
    if (keyed) {
        // we use a dummy key for the second input since it is considered to be broadcast
        testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, new StringKeySelector(), new DummyKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
    }
    testHarness.open();
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(100));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(500));
    // test the keep of sideInputs events
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(1, valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(0), firstWindow))));
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(2, valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(0), secondWindow))));
    // push in a regular elements
    WindowedValue<String> helloElement = valueInWindow("Hello", new Instant(0), firstWindow);
    WindowedValue<String> worldElement = valueInWindow("World", new Instant(1000), firstWindow);
    testHarness.processElement1(new StreamRecord<>(helloElement));
    testHarness.processElement1(new StreamRecord<>(worldElement));
    // test the keep of pushed-back events
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(1, valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(1000), firstWindow))));
    testHarness.processElement2(new StreamRecord<>(new RawUnionValue(2, valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(1000), secondWindow))));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(helloElement, worldElement));
    testHarness.close();
}
Also used : TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 52 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ProcessBundleHandler method createDoFnRunner.

/**
   * Converts a {@link org.apache.beam.fn.v1.BeamFnApi.FunctionSpec} into a {@link DoFnRunner}.
   */
private <InputT, OutputT> DoFnRunner<InputT, OutputT> createDoFnRunner(BeamFnApi.FunctionSpec functionSpec, Map<String, Collection<ThrowingConsumer<WindowedValue<OutputT>>>> outputMap) {
    ByteString serializedFn;
    try {
        serializedFn = functionSpec.getData().unpack(BytesValue.class).getValue();
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException(String.format("Unable to unwrap DoFn %s", functionSpec), e);
    }
    DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
    checkArgument(Objects.equals(new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)), doFnInfo.getOutputMap().keySet()), "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.", outputMap.keySet(), doFnInfo.getOutputMap());
    ImmutableMultimap.Builder<TupleTag<?>, ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput = ImmutableMultimap.builder();
    for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
        tagToOutput.putAll(entry.getValue(), outputMap.get(Long.toString(entry.getKey())));
    }
    @SuppressWarnings({ "unchecked", "rawtypes" }) final Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap = (Map) tagToOutput.build().asMap();
    OutputManager outputManager = new OutputManager() {

        Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput = tagBasedOutputMap;

        @Override
        public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
            try {
                Collection<ThrowingConsumer<WindowedValue<?>>> consumers = tupleTagToOutput.get(tag);
                if (consumers == null) {
                    /* This is a normal case, e.g., if a DoFn has output but that output is not
                 * consumed. Drop the output. */
                    return;
                }
                for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
                    consumer.accept(output);
                }
            } catch (Throwable t) {
                throw new RuntimeException(t);
            }
        }
    };
    @SuppressWarnings({ "unchecked", "rawtypes", "deprecation" }) DoFnRunner<InputT, OutputT> runner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), /* TODO */
    (DoFn) doFnInfo.getDoFn(), NullSideInputReader.empty(), /* TODO */
    outputManager, (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()), new ArrayList<>(doFnInfo.getOutputMap().values()), new FakeStepContext(), (WindowingStrategy) doFnInfo.getWindowingStrategy());
    return runner;
}
Also used : ByteString(com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) FakeStepContext(org.apache.beam.fn.harness.fake.FakeStepContext) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HashSet(java.util.HashSet) DoFnInfo(org.apache.beam.runners.dataflow.util.DoFnInfo) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Collection(java.util.Collection) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 53 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class TransformHierarchyTest method replaceWithCompositeSucceeds.

@Test
public void replaceWithCompositeSucceeds() {
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    TransformHierarchy.Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet());
    hierarchy.replaceOutputs(Collections.<PValue, ReplacementOutput>singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    assertThat(replacementParNode.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(replacementLongs.getKey()));
    assertThat(replacementParNode.getOutputs().values(), Matchers.<PValue>contains(output));
    assertThat(compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet()));
    assertThat(compositeNode.getOutputs().values(), Matchers.<PValue>contains(output));
    hierarchy.popNode();
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 54 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 55 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class WatermarkManager method getInputProcessingWatermarks.

private Collection<Watermark> getInputProcessingWatermarks(AppliedPTransform<?, ?, ?> transform) {
    ImmutableList.Builder<Watermark> inputWmsBuilder = ImmutableList.builder();
    Map<TupleTag<?>, PValue> inputs = transform.getInputs();
    if (inputs.isEmpty()) {
        inputWmsBuilder.add(THE_END_OF_TIME);
    }
    for (PValue pvalue : inputs.values()) {
        Watermark producerOutputWatermark = getValueWatermark(pvalue).synchronizedProcessingOutputWatermark;
        inputWmsBuilder.add(producerOutputWatermark);
    }
    return inputWmsBuilder.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) TupleTag(org.apache.beam.sdk.values.TupleTag) PValue(org.apache.beam.sdk.values.PValue)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)67 Test (org.junit.Test)44 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)27 KV (org.apache.beam.sdk.values.KV)16 PCollection (org.apache.beam.sdk.values.PCollection)15 Instant (org.joda.time.Instant)14 WindowedValue (org.apache.beam.sdk.util.WindowedValue)13 PValue (org.apache.beam.sdk.values.PValue)13 Category (org.junit.experimental.categories.Category)13 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)10 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)9 DoFn (org.apache.beam.sdk.transforms.DoFn)9 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)9 PCollectionView (org.apache.beam.sdk.values.PCollectionView)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Map (java.util.Map)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 Pipeline (org.apache.beam.sdk.Pipeline)5 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4