Search in sources :

Example 1 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.

@Test
public void windowCleanupScheduled() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
    AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
    // Merely creating the evaluator should suffice to register the cleanup callback
    factory.forApplication(producingTransform, inputBundle);
    ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the first window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the second window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.anyString(org.mockito.Matchers.anyString) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) StateNamespace(org.apache.beam.runners.core.StateNamespace) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 2 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StatefulDoFnRunnerTest method testLateDropping.

@Test
public void testLateDropping() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("any");
    MetricsEnvironment.setCurrentContainer(container);
    timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
    DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    runner.startBundle();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
    Instant timestamp = new Instant(0);
    runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
    long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
    assertEquals(1L, droppedValues);
    runner.finishBundle();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 3 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class TimerInternalsTest method testCompareTo.

@Test
public void testCompareTo() {
    Instant firstTimestamp = new Instant(100);
    Instant secondTimestamp = new Instant(200);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), firstTimestamp);
    IntervalWindow secondWindow = new IntervalWindow(firstTimestamp, secondTimestamp);
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    StateNamespace firstWindowNs = StateNamespaces.window(windowCoder, firstWindow);
    StateNamespace secondWindowNs = StateNamespaces.window(windowCoder, secondWindow);
    TimerData firstEventTime = TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.EVENT_TIME);
    TimerData secondEventTime = TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.EVENT_TIME);
    TimerData thirdEventTime = TimerData.of(secondWindowNs, secondTimestamp, TimeDomain.EVENT_TIME);
    TimerData firstProcTime = TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.PROCESSING_TIME);
    TimerData secondProcTime = TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME);
    TimerData thirdProcTime = TimerData.of(secondWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME);
    assertThat(firstEventTime, comparesEqualTo(TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.EVENT_TIME)));
    assertThat(firstEventTime, lessThan(secondEventTime));
    assertThat(secondEventTime, lessThan(thirdEventTime));
    assertThat(firstEventTime, lessThan(thirdEventTime));
    assertThat(secondProcTime, comparesEqualTo(TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME)));
    assertThat(firstProcTime, lessThan(secondProcTime));
    assertThat(secondProcTime, lessThan(thirdProcTime));
    assertThat(firstProcTime, lessThan(thirdProcTime));
    assertThat(firstEventTime, not(comparesEqualTo(firstProcTime)));
    assertThat(firstProcTime, not(comparesEqualTo(TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME))));
}
Also used : Instant(org.joda.time.Instant) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 4 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class ParDoTest method testValueStateFixedWindows.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateFixedWindows() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            c.output(currentValue);
            state.write(currentValue + 1);
        }
    };
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
    PCollection<Integer> output = pipeline.apply(Create.timestamped(// first window
    TimestampedValue.of(KV.of("hello", 7), new Instant(1)), TimestampedValue.of(KV.of("hello", 14), new Instant(2)), TimestampedValue.of(KV.of("hello", 21), new Instant(3)), // second window
    TimestampedValue.of(KV.of("hello", 28), new Instant(11)), TimestampedValue.of(KV.of("hello", 35), new Instant(13)))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply("Stateful ParDo", ParDo.of(fn));
    PAssert.that(output).inWindow(firstWindow).containsInAnyOrder(0, 1, 2);
    PAssert.that(output).inWindow(secondWindow).containsInAnyOrder(0, 1);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 5 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class DoFnOperatorTest method testLateDroppingForStatefulFn.

@Test
public void testLateDroppingForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10)));
    DoFn<Integer, String> fn = new DoFn<Integer, String>() {

        @StateId("state")
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context) {
            context.output(context.element().toString());
        }
    };
    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder = WindowedValue.getFullCoder(VarIntCoder.of(), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarIntCoder.of());
    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<Integer>, Integer>() {

        @Override
        public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
            return integerWindowedValue.getValue();
        }
    }, new CoderTypeInformation<>(VarIntCoder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    // this should not be late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(9);
    // this should still not be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(10);
    // this should now be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) Test(org.junit.Test)

Aggregations

IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)238 Test (org.junit.Test)214 Instant (org.joda.time.Instant)213 WindowedValue (org.apache.beam.sdk.util.WindowedValue)67 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)56 KV (org.apache.beam.sdk.values.KV)56 Duration (org.joda.time.Duration)33 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)32 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)20 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)20 ArrayList (java.util.ArrayList)16 TupleTag (org.apache.beam.sdk.values.TupleTag)16 HashMap (java.util.HashMap)14 PCollectionView (org.apache.beam.sdk.values.PCollectionView)14 Category (org.junit.experimental.categories.Category)13 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)12 FixedWindows (org.apache.beam.sdk.transforms.windowing.FixedWindows)12 ByteBuffer (java.nio.ByteBuffer)11 Map (java.util.Map)11 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11