use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testValueStateTaggedOutput.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
final String stateId = "foo";
final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
};
final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
};
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
if (currentValue % 2 == 0) {
c.output(currentValue);
} else {
c.output(oddTag, currentValue);
}
state.write(currentValue + 1);
}
};
PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
PCollection<Integer> evens = output.get(evenTag);
PCollection<Integer> odds = output.get(oddTag);
// There are 0 and 2 from "hello" and just 0 from "goodbye"
PAssert.that(evens).containsInAnyOrder(0, 2, 0);
// There are 1 and 3 from "hello" and just "1" from "goodbye"
PAssert.that(odds).containsInAnyOrder(1, 3, 1);
pipeline.run();
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testParDoWithEmptyTaggedOutput.
@Test
@Category(ValidatesRunner.class)
public void testParDoWithEmptyTaggedOutput() {
TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
};
TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {
};
TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {
};
PCollectionTuple outputs = pipeline.apply(Create.empty(VarIntCoder.of())).apply(ParDo.of(new TestNoOutputDoFn()).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2)));
PAssert.that(outputs.get(mainOutputTag)).empty();
PAssert.that(outputs.get(additionalOutputTag1)).empty();
PAssert.that(outputs.get(additionalOutputTag2)).empty();
pipeline.run();
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testMainOutputApplyTaggedOutputNoCoder.
@Test
@Category(NeedsRunner.class)
public void testMainOutputApplyTaggedOutputNoCoder() {
// Regression test: applying a transform to the main output
// should not cause a crash based on lack of a coder for the
// additional output.
final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("main");
final TupleTag<TestDummy> additionalOutputTag = new TupleTag<TestDummy>("additionalOutput");
PCollectionTuple tuple = pipeline.apply(Create.of(new TestDummy()).withCoder(TestDummyCoder.of())).apply(ParDo.of(new DoFn<TestDummy, TestDummy>() {
@ProcessElement
public void processElement(ProcessContext context) {
TestDummy element = context.element();
context.output(element);
context.output(additionalOutputTag, element);
}
}).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
// Before fix, tuple.get(mainOutputTag).apply(...) would indirectly trigger
// tuple.get(additionalOutputTag).finishSpecifyingOutput(), which would crash
// on a missing coder.
tuple.get(mainOutputTag).setCoder(TestDummyCoder.of()).apply("Output1", ParDo.of(new DoFn<TestDummy, Integer>() {
@ProcessElement
public void processElement(ProcessContext context) {
context.output(1);
}
}));
tuple.get(additionalOutputTag).setCoder(TestDummyCoder.of());
pipeline.run();
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testParDoWithTaggedOutputName.
@Test
public void testParDoWithTaggedOutputName() {
pipeline.enableAbandonedNodeEnforcement(false);
TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
};
TupleTag<String> additionalOutputTag1 = new TupleTag<String>("output1") {
};
TupleTag<String> additionalOutputTag2 = new TupleTag<String>("output2") {
};
TupleTag<String> additionalOutputTag3 = new TupleTag<String>("output3") {
};
TupleTag<String> additionalOutputTagUnwritten = new TupleTag<String>("unwrittenOutput") {
};
PCollectionTuple outputs = pipeline.apply(Create.of(Arrays.asList(3, -42, 666))).setName("MyInput").apply("MyParDo", ParDo.of(new TestDoFn(Arrays.<PCollectionView<Integer>>asList(), Arrays.asList(additionalOutputTag1, additionalOutputTag2, additionalOutputTag3))).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag3).and(additionalOutputTag1).and(additionalOutputTagUnwritten).and(additionalOutputTag2)));
assertEquals("MyParDo.main", outputs.get(mainOutputTag).getName());
assertEquals("MyParDo.output1", outputs.get(additionalOutputTag1).getName());
assertEquals("MyParDo.output2", outputs.get(additionalOutputTag2).getName());
assertEquals("MyParDo.output3", outputs.get(additionalOutputTag3).getName());
assertEquals("MyParDo.unwrittenOutput", outputs.get(additionalOutputTagUnwritten).getName());
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testMainOutputUnregisteredExplicitCoder.
@Test
@Category(NeedsRunner.class)
public void testMainOutputUnregisteredExplicitCoder() {
PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("unregisteredMain");
final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {
};
PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(additionalOutputTag)).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());
pipeline.run();
}
Aggregations