Search in sources :

Example 16 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SplittableDoFnTest method testAdditionalOutput.

@Test
@Category({ ValidatesRunner.class, UsesSplittableParDo.class })
public void testAdditionalOutput() throws Exception {
    TupleTag<String> mainOutputTag = new TupleTag<String>("main") {
    };
    TupleTag<String> additionalOutputTag = new TupleTag<String>("additional") {
    };
    PCollectionTuple res = p.apply("input", Create.of(0, 1, 2)).apply(ParDo.of(new SDFWithAdditionalOutput(additionalOutputTag)).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    PAssert.that(res.get(mainOutputTag)).containsInAnyOrder(Arrays.asList("main:0", "main:1", "main:2"));
    PAssert.that(res.get(additionalOutputTag)).containsInAnyOrder(Arrays.asList("additional:0", "additional:1", "additional:2"));
    p.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 17 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project component-runtime by Talend.

the class BeamProcessorChainImpl method extractDoFn.

private static Collection<DoFn<?, ?>> extractDoFn(final CapturingPipeline.TransformWithCoder step, final CoderRegistry coderRegistry) {
    final CapturingPipeline capturingPipeline = new CapturingPipeline(PipelineOptionsFactory.create());
    if (coderRegistry != null) {
        capturingPipeline.setCoderRegistry(coderRegistry);
    }
    final POutput apply = capturingPipeline.apply(new PTransform<PBegin, PCollection<Object>>() {

        @Override
        public PCollection<Object> expand(final PBegin input) {
            return PCollection.createPrimitiveOutputInternal(capturingPipeline, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, TypingCoder.INSTANCE);
        }

        @Override
        protected Coder<?> getDefaultOutputCoder() {
            return TypingCoder.INSTANCE;
        }
    }).apply(step.getTransform());
    if (PCollectionTuple.class.isInstance(apply) && step.getCoders() != null) {
        final Map<TupleTag<?>, PCollection<?>> all = PCollectionTuple.class.cast(apply).getAll();
        step.getCoders().forEach((k, v) -> {
            final PCollection<?> collection = all.get(k);
            if (collection != null) {
                collection.setCoder(Coder.class.cast(v));
            }
        });
    } else if (PCollection.class.isInstance(apply) && step.getCoders() != null && !step.getCoders().isEmpty()) {
        PCollection.class.cast(apply).setCoder(Coder.class.cast(step.getCoders().values().iterator().next()));
    }
    final CapturingPipeline.SinkExtractor sinkExtractor = new CapturingPipeline.SinkExtractor();
    capturingPipeline.traverseTopologically(sinkExtractor);
    return sinkExtractor.getOutputs();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) TupleTag(org.apache.beam.sdk.values.TupleTag) PBegin(org.apache.beam.sdk.values.PBegin) PCollection(org.apache.beam.sdk.values.PCollection) POutput(org.apache.beam.sdk.values.POutput) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform)

Example 18 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testEmpty.

@Test
public void testEmpty() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
    runner.startBundle();
    runner.finishBundle();
    List<?> result = outputManager.getOutput(outputTag);
    assertEquals(0, result.size());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) Test(org.junit.Test)

Example 19 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testFixedWindows.

@Test
public void testFixedWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
    runner.startBundle();
    WorkItem.Builder workItem = WorkItem.newBuilder();
    workItem.setKey(ByteString.copyFromUtf8(KEY));
    workItem.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
    runner.processElement(createValue(workItem, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertEquals(4, result.size());
    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
    assertEquals(KEY, item0.getValue().getKey());
    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1"));
    assertEquals(new Instant(1), item0.getTimestamp());
    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
    assertEquals(KEY, item1.getValue().getKey());
    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
    assertEquals(new Instant(2), item1.getTimestamp());
    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
    assertEquals(KEY, item2.getValue().getKey());
    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v0"));
    assertEquals(new Instant(0), item2.getTimestamp());
    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item3 = result.get(3);
    assertEquals(KEY, item3.getValue().getKey());
    assertThat(item3.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
    assertEquals(new Instant(13), item3.getTimestamp());
    assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 20 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindowsAndLateData.

@Test
public void testSlidingWindowsAndLateData() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("step");
    MetricsEnvironment.setCurrentContainer(container);
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST);
    GroupAlsoByWindowFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, new StepContextStateInternalsFactory<String>(stepContext), StringUtf8Coder.of());
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, windowingStrategy, fn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(3));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), emptyIterable()), equalTo(window(-10, 10).maxTimestamp()), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
    long droppedValues = container.getCounter(MetricName.named(LateDataDroppingDoFnRunner.class, LateDataDroppingDoFnRunner.DROPPED_DUE_TO_LATENESS)).getCumulative().longValue();
    assertThat(droppedValues, equalTo(2L));
}
Also used : LateDataDroppingDoFnRunner(org.apache.beam.runners.core.LateDataDroppingDoFnRunner) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)185 Test (org.junit.Test)100 WindowedValue (org.apache.beam.sdk.util.WindowedValue)54 KV (org.apache.beam.sdk.values.KV)54 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)49 PCollection (org.apache.beam.sdk.values.PCollection)42 DoFn (org.apache.beam.sdk.transforms.DoFn)32 Instant (org.joda.time.Instant)32 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)30 Map (java.util.Map)29 Pipeline (org.apache.beam.sdk.Pipeline)29 PCollectionView (org.apache.beam.sdk.values.PCollectionView)29 HashMap (java.util.HashMap)27 Coder (org.apache.beam.sdk.coders.Coder)26 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)25 Matchers.containsString (org.hamcrest.Matchers.containsString)25 List (java.util.List)24 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)23 KvCoder (org.apache.beam.sdk.coders.KvCoder)22 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)22