Search in sources :

Example 21 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ParDoTest method testMultiOutputParDoWithSideInputs.

@Test
@Category(ValidatesRunner.class)
public void testMultiOutputParDoWithSideInputs() {
    List<Integer> inputs = Arrays.asList(3, -42, 666);
    final TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
    };
    final TupleTag<Void> additionalOutputTag = new TupleTag<Void>("output") {
    };
    PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
    PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestDoFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    PAssert.that(outputs.get(mainOutputTag)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).andSideInputs(11, 222));
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 22 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ReplacementOutputsTest method taggedSucceeds.

@Test
public void taggedSucceeds() {
    PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs).and(moreIntsTag, moreInts);
    Map<PValue, ReplacementOutput> replacements = ReplacementOutputs.tagged(original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts).and(intsTag, replacementInts));
    assertThat(replacements.keySet(), Matchers.<PValue>containsInAnyOrder(replacementStrs, replacementInts, moreReplacementInts));
    ReplacementOutput intsReplacement = replacements.get(replacementInts);
    ReplacementOutput strsReplacement = replacements.get(replacementStrs);
    ReplacementOutput moreIntsReplacement = replacements.get(moreReplacementInts);
    assertThat(intsReplacement, equalTo(ReplacementOutput.of(TaggedPValue.of(intsTag, ints), TaggedPValue.of(intsTag, replacementInts))));
    assertThat(strsReplacement, equalTo(ReplacementOutput.of(TaggedPValue.of(strsTag, strs), TaggedPValue.of(strsTag, replacementStrs))));
    assertThat(moreIntsReplacement, equalTo(ReplacementOutput.of(TaggedPValue.of(moreIntsTag, moreInts), TaggedPValue.of(moreIntsTag, moreReplacementInts))));
}
Also used : ReplacementOutput(org.apache.beam.sdk.runners.PTransformOverrideFactory.ReplacementOutput) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PValue(org.apache.beam.sdk.values.PValue) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) Test(org.junit.Test)

Example 23 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ReplacementOutputsTest method taggedMissingReplacementThrows.

@Test
public void taggedMissingReplacementThrows() {
    PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs).and(moreIntsTag, moreInts);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Missing replacement");
    thrown.expectMessage(intsTag.toString());
    thrown.expectMessage(ints.toString());
    ReplacementOutputs.tagged(original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts));
}
Also used : PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Test(org.junit.Test)

Example 24 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class TransformHierarchyTest method replaceWithCompositeSucceeds.

@Test
public void replaceWithCompositeSucceeds() {
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    TransformHierarchy.Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet());
    hierarchy.replaceOutputs(Collections.<PValue, ReplacementOutput>singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    assertThat(replacementParNode.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(replacementLongs.getKey()));
    assertThat(replacementParNode.getOutputs().values(), Matchers.<PValue>contains(output));
    assertThat(compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet()));
    assertThat(compositeNode.getOutputs().values(), Matchers.<PValue>contains(output));
    hierarchy.popNode();
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 25 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ParDoTest method testValueStateTaggedOutput.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
    final String stateId = "foo";
    final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
    };
    final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
    };
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            if (currentValue % 2 == 0) {
                c.output(currentValue);
            } else {
                c.output(oddTag, currentValue);
            }
            state.write(currentValue + 1);
        }
    };
    PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
    PCollection<Integer> evens = output.get(evenTag);
    PCollection<Integer> odds = output.get(oddTag);
    // There are 0 and 2 from "hello" and just 0 from "goodbye"
    PAssert.that(evens).containsInAnyOrder(0, 2, 0);
    // There are 1 and 3 from "hello" and just "1" from "goodbye"
    PAssert.that(odds).containsInAnyOrder(1, 3, 1);
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)31 TupleTag (org.apache.beam.sdk.values.TupleTag)27 Test (org.junit.Test)26 Category (org.junit.experimental.categories.Category)13 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)8 Matchers.containsString (org.hamcrest.Matchers.containsString)8 KV (org.apache.beam.sdk.values.KV)6 PCollection (org.apache.beam.sdk.values.PCollection)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)4 PValue (org.apache.beam.sdk.values.PValue)4 Pipeline (org.apache.beam.sdk.Pipeline)3 ValueState (org.apache.beam.sdk.state.ValueState)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 TupleTagList (org.apache.beam.sdk.values.TupleTagList)3 Instant (org.joda.time.Instant)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 List (java.util.List)2 Map (java.util.Map)2 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)2 StatefulParDo (org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo)2