Search in sources :

Example 21 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testFixedWindows.

@Test
public void testFixedWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(window(0, 10).maxTimestamp()), equalTo(window(0, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(window(10, 20).maxTimestamp()), equalTo(window(10, 20)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 22 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessions.

@Test
public void testSessions() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(0)), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(new Instant(15)), equalTo(window(15, 25)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 23 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessionsCombine.

@Test
public void testSessionsCombine() throws Exception {
    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
    CombineFn<Long, ?, Long> combineFn = new SumLongs();
    CoderRegistry registry = CoderRegistry.createDefault();
    AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(combineFn, registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))), appliedCombineFn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<Long> valueCoder = BigEndianLongCoder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(7L)), equalTo(window(0, 15).maxTimestamp()), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(3L)), equalTo(window(15, 25).maxTimestamp()), equalTo(window(15, 25)))));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 24 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SimpleParDoFnTest method testStateTracking.

@Test
public void testStateTracking() throws Exception {
    ExecutionStateTracker tracker = ExecutionStateTracker.newForTest();
    TestOperationContext operationContext = TestOperationContext.create(new CounterSet(), NameContextsForTests.nameContextForTest(), new MetricsContainerImpl(NameContextsForTests.ORIGINAL_NAME), tracker);
    class StateTestingDoFn extends DoFn<Integer, String> {

        private boolean startCalled = false;

        @StartBundle
        public void startBundle() throws Exception {
            startCalled = true;
            assertThat(tracker.getCurrentState(), equalTo(operationContext.getStartState()));
        }

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            assertThat(startCalled, equalTo(true));
            assertThat(tracker.getCurrentState(), equalTo(operationContext.getProcessState()));
        }
    }
    StateTestingDoFn fn = new StateTestingDoFn();
    DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
    ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.singleInstance(fnInfo), NullSideInputReader.empty(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0, new TupleTag<>("declared"), 1), BatchModeExecutionContext.forTesting(options, operationContext.counterFactory(), "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
    // This test ensures proper behavior of the state sampling even with lazy initialization.
    try (Closeable trackerCloser = tracker.activate()) {
        try (Closeable processCloser = operationContext.enterProcess()) {
            userParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
        }
    }
}
Also used : MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DoFn(org.apache.beam.sdk.transforms.DoFn) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) Closeable(java.io.Closeable) TupleTag(org.apache.beam.sdk.values.TupleTag) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) Test(org.junit.Test)

Example 25 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SimpleParDoFnTest method testOutputReceivers.

@Test
public void testOutputReceivers() throws Exception {
    TestDoFn fn = new TestDoFn(ImmutableList.of(new TupleTag<>("tag1"), new TupleTag<>("tag2"), new TupleTag<>("tag3")));
    DoFnInfo<?, ?> fnInfo = DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    MAIN_OUTPUT, DoFnSchemaInformation.create(), Collections.emptyMap());
    TestReceiver receiver = new TestReceiver();
    TestReceiver receiver1 = new TestReceiver();
    TestReceiver receiver2 = new TestReceiver();
    TestReceiver receiver3 = new TestReceiver();
    ParDoFn userParDoFn = new SimpleParDoFn<>(options, DoFnInstanceManagers.cloningPool(fnInfo, options), new EmptySideInputReader(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0, new TupleTag<String>("tag1"), 1, new TupleTag<String>("tag2"), 2, new TupleTag<String>("tag3"), 3), BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(operationContext), operationContext, DoFnSchemaInformation.create(), Collections.emptyMap(), SimpleDoFnRunnerFactory.INSTANCE);
    userParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
    userParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
    userParDoFn.processElement(WindowedValue.valueInGlobalWindow(42));
    userParDoFn.processElement(WindowedValue.valueInGlobalWindow(666));
    userParDoFn.finishBundle();
    Object[] expectedReceivedElems = { WindowedValue.valueInGlobalWindow("processing: 3"), WindowedValue.valueInGlobalWindow("processing: 42"), WindowedValue.valueInGlobalWindow("processing: 666"), WindowedValue.valueInGlobalWindow("finished") };
    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
    Object[] expectedReceivedElems1 = { WindowedValue.valueInGlobalWindow("tag1: processing: 3"), WindowedValue.valueInGlobalWindow("tag1: processing: 42"), WindowedValue.valueInGlobalWindow("tag1: processing: 666"), WindowedValue.valueInGlobalWindow("tag1: finished") };
    assertArrayEquals(expectedReceivedElems1, receiver1.receivedElems.toArray());
    Object[] expectedReceivedElems2 = { WindowedValue.valueInGlobalWindow("tag2: processing: 3"), WindowedValue.valueInGlobalWindow("tag2: processing: 42"), WindowedValue.valueInGlobalWindow("tag2: processing: 666"), WindowedValue.valueInGlobalWindow("tag2: finished") };
    assertArrayEquals(expectedReceivedElems2, receiver2.receivedElems.toArray());
    Object[] expectedReceivedElems3 = { WindowedValue.valueInGlobalWindow("tag3: processing: 3"), WindowedValue.valueInGlobalWindow("tag3: processing: 42"), WindowedValue.valueInGlobalWindow("tag3: processing: 666"), WindowedValue.valueInGlobalWindow("tag3: finished") };
    assertArrayEquals(expectedReceivedElems3, receiver3.receivedElems.toArray());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) Test(org.junit.Test)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)185 Test (org.junit.Test)100 WindowedValue (org.apache.beam.sdk.util.WindowedValue)54 KV (org.apache.beam.sdk.values.KV)54 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)49 PCollection (org.apache.beam.sdk.values.PCollection)42 DoFn (org.apache.beam.sdk.transforms.DoFn)32 Instant (org.joda.time.Instant)32 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)30 Map (java.util.Map)29 Pipeline (org.apache.beam.sdk.Pipeline)29 PCollectionView (org.apache.beam.sdk.values.PCollectionView)29 HashMap (java.util.HashMap)27 Coder (org.apache.beam.sdk.coders.Coder)26 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)25 Matchers.containsString (org.hamcrest.Matchers.containsString)25 List (java.util.List)24 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)23 KvCoder (org.apache.beam.sdk.coders.KvCoder)22 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)22