Search in sources :

Example 46 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ParDoTest method testValueStateTaggedOutput.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
    final String stateId = "foo";
    final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
    };
    final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
    };
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            if (currentValue % 2 == 0) {
                c.output(currentValue);
            } else {
                c.output(oddTag, currentValue);
            }
            state.write(currentValue + 1);
        }
    };
    PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
    PCollection<Integer> evens = output.get(evenTag);
    PCollection<Integer> odds = output.get(oddTag);
    // There are 0 and 2 from "hello" and just 0 from "goodbye"
    PAssert.that(evens).containsInAnyOrder(0, 2, 0);
    // There are 1 and 3 from "hello" and just "1" from "goodbye"
    PAssert.that(odds).containsInAnyOrder(1, 3, 1);
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 47 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ParDoTest method testParDoWithEmptyTaggedOutput.

@Test
@Category(ValidatesRunner.class)
public void testParDoWithEmptyTaggedOutput() {
    TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
    };
    TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {
    };
    TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {
    };
    PCollectionTuple outputs = pipeline.apply(Create.empty(VarIntCoder.of())).apply(ParDo.of(new TestNoOutputDoFn()).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2)));
    PAssert.that(outputs.get(mainOutputTag)).empty();
    PAssert.that(outputs.get(additionalOutputTag1)).empty();
    PAssert.that(outputs.get(additionalOutputTag2)).empty();
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 48 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ParDoTest method testMainOutputApplyTaggedOutputNoCoder.

@Test
@Category(NeedsRunner.class)
public void testMainOutputApplyTaggedOutputNoCoder() {
    // Regression test: applying a transform to the main output
    // should not cause a crash based on lack of a coder for the
    // additional output.
    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("main");
    final TupleTag<TestDummy> additionalOutputTag = new TupleTag<TestDummy>("additionalOutput");
    PCollectionTuple tuple = pipeline.apply(Create.of(new TestDummy()).withCoder(TestDummyCoder.of())).apply(ParDo.of(new DoFn<TestDummy, TestDummy>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            TestDummy element = context.element();
            context.output(element);
            context.output(additionalOutputTag, element);
        }
    }).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    // Before fix, tuple.get(mainOutputTag).apply(...) would indirectly trigger
    // tuple.get(additionalOutputTag).finishSpecifyingOutput(), which would crash
    // on a missing coder.
    tuple.get(mainOutputTag).setCoder(TestDummyCoder.of()).apply("Output1", ParDo.of(new DoFn<TestDummy, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            context.output(1);
        }
    }));
    tuple.get(additionalOutputTag).setCoder(TestDummyCoder.of());
    pipeline.run();
}
Also used : ProcessElement(org.apache.beam.sdk.transforms.DoFn.ProcessElement) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 49 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ParDoTest method testParDoWithTaggedOutputName.

@Test
public void testParDoWithTaggedOutputName() {
    pipeline.enableAbandonedNodeEnforcement(false);
    TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
    };
    TupleTag<String> additionalOutputTag1 = new TupleTag<String>("output1") {
    };
    TupleTag<String> additionalOutputTag2 = new TupleTag<String>("output2") {
    };
    TupleTag<String> additionalOutputTag3 = new TupleTag<String>("output3") {
    };
    TupleTag<String> additionalOutputTagUnwritten = new TupleTag<String>("unwrittenOutput") {
    };
    PCollectionTuple outputs = pipeline.apply(Create.of(Arrays.asList(3, -42, 666))).setName("MyInput").apply("MyParDo", ParDo.of(new TestDoFn(Arrays.<PCollectionView<Integer>>asList(), Arrays.asList(additionalOutputTag1, additionalOutputTag2, additionalOutputTag3))).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag3).and(additionalOutputTag1).and(additionalOutputTagUnwritten).and(additionalOutputTag2)));
    assertEquals("MyParDo.main", outputs.get(mainOutputTag).getName());
    assertEquals("MyParDo.output1", outputs.get(additionalOutputTag1).getName());
    assertEquals("MyParDo.output2", outputs.get(additionalOutputTag2).getName());
    assertEquals("MyParDo.output3", outputs.get(additionalOutputTag3).getName());
    assertEquals("MyParDo.unwrittenOutput", outputs.get(additionalOutputTagUnwritten).getName());
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Example 50 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class ParDoTest method testMainOutputUnregisteredExplicitCoder.

@Test
@Category(NeedsRunner.class)
public void testMainOutputUnregisteredExplicitCoder() {
    PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("unregisteredMain");
    final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {
    };
    PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(additionalOutputTag)).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)67 Test (org.junit.Test)44 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)27 KV (org.apache.beam.sdk.values.KV)16 PCollection (org.apache.beam.sdk.values.PCollection)15 Instant (org.joda.time.Instant)14 WindowedValue (org.apache.beam.sdk.util.WindowedValue)13 PValue (org.apache.beam.sdk.values.PValue)13 Category (org.junit.experimental.categories.Category)13 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)10 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)9 DoFn (org.apache.beam.sdk.transforms.DoFn)9 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)9 PCollectionView (org.apache.beam.sdk.values.PCollectionView)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Map (java.util.Map)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 Pipeline (org.apache.beam.sdk.Pipeline)5 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4