use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testMultiOutputAppliedMultipleTimesDifferentOutputs.
@Test
public void testMultiOutputAppliedMultipleTimesDifferentOutputs() {
pipeline.enableAbandonedNodeEnforcement(false);
PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0));
TupleTag<Long> mainOut = new TupleTag<>();
final TupleTag<String> valueAsString = new TupleTag<>();
final TupleTag<Integer> valueAsInt = new TupleTag<>();
DoFn<Long, Long> fn = new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext cxt) {
cxt.output(cxt.element());
cxt.output(valueAsString, Long.toString(cxt.element()));
cxt.output(valueAsInt, Long.valueOf(cxt.element()).intValue());
}
};
ParDo.MultiOutput<Long, Long> parDo = ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt));
PCollectionTuple firstApplication = longs.apply("first", parDo);
PCollectionTuple secondApplication = longs.apply("second", parDo);
assertThat(firstApplication, not(equalTo(secondApplication)));
assertThat(firstApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
assertThat(secondApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testMultiOutputChaining.
@Test
@Category(NeedsRunner.class)
public void testMultiOutputChaining() {
PCollectionTuple filters = pipeline.apply(Create.of(Arrays.asList(3, 4, 5, 6))).apply(new MultiFilter());
PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
PCollection<Integer> by3 = filters.get(MultiFilter.BY3);
// Apply additional filters to each operation.
PCollection<Integer> by2then3 = by2.apply("Filter3sAgain", ParDo.of(new MultiFilter.FilterFn(3)));
PCollection<Integer> by3then2 = by3.apply("Filter2sAgain", ParDo.of(new MultiFilter.FilterFn(2)));
PAssert.that(by2then3).containsInAnyOrder(6);
PAssert.that(by3then2).containsInAnyOrder(6);
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ReplacementOutputsTest method taggedExtraReplacementThrows.
@Test
public void taggedExtraReplacementThrows() {
PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs);
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Missing original output");
thrown.expectMessage(moreIntsTag.toString());
thrown.expectMessage(moreReplacementInts.toString());
ReplacementOutputs.tagged(original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts).and(intsTag, replacementInts));
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class PTransformTranslationTest method multiMultiParDo.
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.<String>asSingleton());
PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
ParDo.MultiOutput<Long, KV<Long, String>> parDo = ParDo.of(new TestDoFn()).withSideInputs(view).withOutputTags(new TupleTag<KV<Long, String>>() {
}, TupleTagList.of(new TupleTag<KV<String, Long>>() {
}));
PCollectionTuple output = input.apply(parDo);
Map<TupleTag<?>, PValue> inputs = new HashMap<>();
inputs.putAll(parDo.getAdditionalInputs());
inputs.putAll(input.expand());
return AppliedPTransform.<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of("MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class CreateStreamTest method testMultiOutputParDo.
/**
* Test multiple output {@link ParDo} in streaming pipelines.
* This is currently needed as a test for https://issues.apache.org/jira/browse/BEAM-2029 since
* {@link org.apache.beam.sdk.testing.ValidatesRunner} tests do not currently run for Spark runner
* in streaming mode.
*/
@Test
public void testMultiOutputParDo() throws IOException {
Instant instant = new Instant(0);
CreateStream<Integer> source1 = CreateStream.of(VarIntCoder.of(), batchDuration()).emptyBatch().advanceWatermarkForNextBatch(instant.plus(Duration.standardMinutes(5))).nextBatch(TimestampedValue.of(1, instant), TimestampedValue.of(2, instant), TimestampedValue.of(3, instant)).advanceNextBatchWatermarkToInfinity();
PCollection<Integer> inputs = p.apply(source1);
final TupleTag<Integer> mainTag = new TupleTag<>();
final TupleTag<Integer> additionalTag = new TupleTag<>();
PCollectionTuple outputs = inputs.apply(ParDo.of(new DoFn<Integer, Integer>() {
@SuppressWarnings("unused")
@ProcessElement
public void process(ProcessContext context) {
Integer element = context.element();
context.output(element);
context.output(additionalTag, element + 1);
}
}).withOutputTags(mainTag, TupleTagList.of(additionalTag)));
PCollection<Integer> output1 = outputs.get(mainTag).setCoder(VarIntCoder.of());
PCollection<Integer> output2 = outputs.get(additionalTag).setCoder(VarIntCoder.of());
PAssert.that(output1).containsInAnyOrder(1, 2, 3);
PAssert.that(output2).containsInAnyOrder(2, 3, 4);
p.run();
}
Aggregations