use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testParDoEmptyWithTaggedOutput.
@Test
@Category(ValidatesRunner.class)
public void testParDoEmptyWithTaggedOutput() {
TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
};
TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {
};
TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {
};
TupleTag<String> additionalOutputTag3 = new TupleTag<String>("additional3") {
};
TupleTag<String> additionalOutputTagUnwritten = new TupleTag<String>("unwrittenOutput") {
};
PCollectionTuple outputs = pipeline.apply(Create.empty(VarIntCoder.of())).apply(ParDo.of(new TestDoFn(Arrays.<PCollectionView<Integer>>asList(), Arrays.asList(additionalOutputTag1, additionalOutputTag2, additionalOutputTag3))).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag3).and(additionalOutputTag1).and(additionalOutputTagUnwritten).and(additionalOutputTag2)));
List<Integer> inputs = Collections.emptyList();
PAssert.that(outputs.get(mainOutputTag)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
PAssert.that(outputs.get(additionalOutputTag1)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag1));
PAssert.that(outputs.get(additionalOutputTag2)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag2));
PAssert.that(outputs.get(additionalOutputTag3)).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).fromOutput(additionalOutputTag3));
PAssert.that(outputs.get(additionalOutputTagUnwritten)).empty();
pipeline.run();
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testTaggedOutputUnregisteredExplicitCoder.
@Test
public void testTaggedOutputUnregisteredExplicitCoder() throws Exception {
pipeline.enableAbandonedNodeEnforcement(false);
PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));
final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main");
final TupleTag<TestDummy> additionalOutputTag = new TupleTag<TestDummy>("unregisteredSide");
ParDo.MultiOutput<Integer, Integer> pardo = ParDo.of(new TaggedOutputDummyFn(additionalOutputTag)).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag));
PCollectionTuple outputTuple = input.apply(pardo);
outputTuple.get(additionalOutputTag).setCoder(new TestDummyCoder());
outputTuple.get(additionalOutputTag).apply(View.<TestDummy>asSingleton());
assertEquals(new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder());
outputTuple.get(additionalOutputTag).finishSpecifyingOutput("ParDo", input, // Check for crashes
pardo);
assertEquals(new TestDummyCoder(), // Check for corruption
outputTuple.get(additionalOutputTag).getCoder());
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ParDoTest method testMultiOutputAppliedMultipleTimesDifferentOutputs.
@Test
public void testMultiOutputAppliedMultipleTimesDifferentOutputs() {
pipeline.enableAbandonedNodeEnforcement(false);
PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0));
TupleTag<Long> mainOut = new TupleTag<>();
final TupleTag<String> valueAsString = new TupleTag<>();
final TupleTag<Integer> valueAsInt = new TupleTag<>();
DoFn<Long, Long> fn = new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext cxt) {
cxt.output(cxt.element());
cxt.output(valueAsString, Long.toString(cxt.element()));
cxt.output(valueAsInt, Long.valueOf(cxt.element()).intValue());
}
};
ParDo.MultiOutput<Long, Long> parDo = ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt));
PCollectionTuple firstApplication = longs.apply("first", parDo);
PCollectionTuple secondApplication = longs.apply("second", parDo);
assertThat(firstApplication, not(equalTo(secondApplication)));
assertThat(firstApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
assertThat(secondApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class ReplacementOutputs method tagged.
public static Map<PValue, ReplacementOutput> tagged(Map<TupleTag<?>, PValue> original, POutput replacement) {
Map<TupleTag<?>, TaggedPValue> originalTags = new HashMap<>();
for (Map.Entry<TupleTag<?>, PValue> originalValue : original.entrySet()) {
originalTags.put(originalValue.getKey(), TaggedPValue.of(originalValue.getKey(), originalValue.getValue()));
}
ImmutableMap.Builder<PValue, ReplacementOutput> resultBuilder = ImmutableMap.builder();
Set<TupleTag<?>> missingTags = new HashSet<>(originalTags.keySet());
for (Map.Entry<TupleTag<?>, PValue> replacementValue : replacement.expand().entrySet()) {
TaggedPValue mapped = originalTags.get(replacementValue.getKey());
checkArgument(mapped != null, "Missing original output for Tag %s and Value %s Between original %s and replacement %s", replacementValue.getKey(), replacementValue.getValue(), original, replacement.expand());
resultBuilder.put(replacementValue.getValue(), ReplacementOutput.of(mapped, TaggedPValue.of(replacementValue.getKey(), replacementValue.getValue())));
missingTags.remove(replacementValue.getKey());
}
ImmutableMap<PValue, ReplacementOutput> result = resultBuilder.build();
checkArgument(missingTags.isEmpty(), "Missing replacement for tags %s. Encountered tags: %s", missingTags, result.keySet());
return result;
}
use of org.apache.beam.sdk.values.TupleTag in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder = WindowedValue.getFullCoder(VarIntCoder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
Collections.<PCollectionView<?>>emptyList(), /* side inputs */
PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarIntCoder.of());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<Integer>, Integer>() {
@Override
public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
return integerWindowedValue.getValue();
}
}, new CoderTypeInformation<>(VarIntCoder.of()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
Aggregations