Search in sources :

Example 1 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.

@Test
public void windowCleanupScheduled() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
    AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
    // Merely creating the evaluator should suffice to register the cleanup callback
    factory.forApplication(producingTransform, inputBundle);
    ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the first window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the second window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.anyString(org.mockito.Matchers.anyString) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) StateNamespace(org.apache.beam.runners.core.StateNamespace) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 2 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testEmpty.

@Test
public void testEmpty() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
    runner.startBundle();
    runner.finishBundle();
    List<?> result = outputManager.getOutput(outputTag);
    assertEquals(0, result.size());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) Test(org.junit.Test)

Example 3 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testFixedWindows.

@Test
public void testFixedWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
    runner.startBundle();
    WorkItem.Builder workItem = WorkItem.newBuilder();
    workItem.setKey(ByteString.copyFromUtf8(KEY));
    workItem.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
    runner.processElement(createValue(workItem, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertEquals(4, result.size());
    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
    assertEquals(KEY, item0.getValue().getKey());
    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1"));
    assertEquals(new Instant(1), item0.getTimestamp());
    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
    assertEquals(KEY, item1.getValue().getKey());
    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
    assertEquals(new Instant(2), item1.getTimestamp());
    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
    assertEquals(KEY, item2.getValue().getKey());
    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v0"));
    assertEquals(new Instant(0), item2.getTimestamp());
    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
    WindowedValue<KV<String, Iterable<String>>> item3 = result.get(3);
    assertEquals(KEY, item3.getValue().getKey());
    assertThat(item3.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
    assertEquals(new Instant(13), item3.getTimestamp());
    assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 4 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindowsAndLateData.

@Test
public void testSlidingWindowsAndLateData() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("step");
    MetricsEnvironment.setCurrentContainer(container);
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST);
    GroupAlsoByWindowFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, new StepContextStateInternalsFactory<String>(stepContext), StringUtf8Coder.of());
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, windowingStrategy, fn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(3));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), emptyIterable()), equalTo(window(-10, 10).maxTimestamp()), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
    long droppedValues = container.getCounter(MetricName.named(LateDataDroppingDoFnRunner.class, LateDataDroppingDoFnRunner.DROPPED_DUE_TO_LATENESS)).getCumulative().longValue();
    assertThat(droppedValues, equalTo(2L));
}
Also used : LateDataDroppingDoFnRunner(org.apache.beam.runners.core.LateDataDroppingDoFnRunner) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 5 with KeyedWorkItem

use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testFixedWindows.

@Test
public void testFixedWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(window(0, 10).maxTimestamp()), equalTo(window(0, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(window(10, 20).maxTimestamp()), equalTo(window(10, 20)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Aggregations

KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)20 TupleTag (org.apache.beam.sdk.values.TupleTag)16 KV (org.apache.beam.sdk.values.KV)15 Instant (org.joda.time.Instant)14 Test (org.junit.Test)13 WindowedValue (org.apache.beam.sdk.util.WindowedValue)11 ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)9 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)9 InputMessageBundle (org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle)6 WorkItem (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollection (org.apache.beam.sdk.values.PCollection)4 OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)3 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)3 KvCoder (org.apache.beam.sdk.coders.KvCoder)3 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)3 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)3 Collection (java.util.Collection)2 List (java.util.List)2