use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class Partition method expand.
/////////////////////////////////////////////////////////////////////////////
@Override
public PCollectionList<T> expand(PCollection<T> in) {
final TupleTagList outputTags = partitionDoFn.getOutputTags();
PCollectionTuple outputs = in.apply(ParDo.of(partitionDoFn).withOutputTags(new TupleTag<Void>() {
}, outputTags));
PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline());
Coder<T> coder = in.getCoder();
for (TupleTag<?> outputTag : outputTags.getAll()) {
// All the tuple tags are actually TupleTag<T>
// And all the collections are actually PCollection<T>
@SuppressWarnings("unchecked") TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag;
pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder));
}
return pcs;
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testParDoEmptyWithTaggedOutput.
@Test
@Category(ValidatesRunner.class)
public void testParDoEmptyWithTaggedOutput() {
TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
};
TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {
};
TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {
};
TupleTag<String> additionalOutputTag3 = new TupleTag<String>("additional3") {
};
TupleTag<String> additionalOutputTagUnwritten = new TupleTag<String>("unwrittenOutput") {
};
PCollectionTuple outputs = pipeline.apply(Create.empty(VarIntCoder.of())).apply(ParDo.of(new TestDoFn(Arrays.<PCollectionView<Integer>>asList(), Arrays.asList(additionalOutputTag1, additionalOutputTag2, additionalOutputTag3))).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag3).and(additionalOutputTag1).and(additionalOutputTagUnwritten).and(additionalOutputTag2)));
List<Integer> inputs = Collections.emptyList();
PAssert.that(outputs.get(mainOutputTag)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
PAssert.that(outputs.get(additionalOutputTag1)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag1));
PAssert.that(outputs.get(additionalOutputTag2)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag2));
PAssert.that(outputs.get(additionalOutputTag3)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag3));
PAssert.that(outputs.get(additionalOutputTagUnwritten)).empty();
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testTaggedOutputUnregisteredExplicitCoder.
@Test
public void testTaggedOutputUnregisteredExplicitCoder() throws Exception {
pipeline.enableAbandonedNodeEnforcement(false);
PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main");
final TupleTag<TestDummy> additionalOutputTag = new TupleTag<TestDummy>("unregisteredSide");
ParDo.MultiOutput<Integer, Integer> pardo = ParDo.of(new TaggedOutputDummyFn(additionalOutputTag)).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag));
PCollectionTuple outputTuple = input.apply(pardo);
outputTuple.get(additionalOutputTag).setCoder(new TestDummyCoder());
outputTuple.get(additionalOutputTag).apply(View.<TestDummy>asSingleton());
assertEquals(new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder());
outputTuple.get(additionalOutputTag).finishSpecifyingOutput("ParDo", input, // Check for crashes
pardo);
assertEquals(new TestDummyCoder(), // Check for corruption
outputTuple.get(additionalOutputTag).getCoder());
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testMultiOutputAppliedMultipleTimesDifferentOutputs.
@Test
public void testMultiOutputAppliedMultipleTimesDifferentOutputs() {
pipeline.enableAbandonedNodeEnforcement(false);
PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0));
TupleTag<Long> mainOut = new TupleTag<>();
final TupleTag<String> valueAsString = new TupleTag<>();
final TupleTag<Integer> valueAsInt = new TupleTag<>();
DoFn<Long, Long> fn = new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext cxt) {
cxt.output(cxt.element());
cxt.output(valueAsString, Long.toString(cxt.element()));
cxt.output(valueAsInt, Long.valueOf(cxt.element()).intValue());
}
};
ParDo.MultiOutput<Long, Long> parDo = ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt));
PCollectionTuple firstApplication = longs.apply("first", parDo);
PCollectionTuple secondApplication = longs.apply("second", parDo);
assertThat(firstApplication, not(equalTo(secondApplication)));
assertThat(firstApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
assertThat(secondApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
}
use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.
the class ParDoTest method testMultiOutputChaining.
@Test
@Category(NeedsRunner.class)
public void testMultiOutputChaining() {
PCollectionTuple filters = pipeline.apply(Create.of(Arrays.asList(3, 4, 5, 6))).apply(new MultiFilter());
PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
PCollection<Integer> by3 = filters.get(MultiFilter.BY3);
// Apply additional filters to each operation.
PCollection<Integer> by2then3 = by2.apply("Filter3sAgain", ParDo.of(new MultiFilter.FilterFn(3)));
PCollection<Integer> by3then2 = by3.apply("Filter2sAgain", ParDo.of(new MultiFilter.FilterFn(2)));
PAssert.that(by2then3).containsInAnyOrder(6);
PAssert.that(by3then2).containsInAnyOrder(6);
pipeline.run();
}
Aggregations