use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.
the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.
@Test
public void windowCleanupScheduled() throws Exception {
// To test the factory, first we set up a pipeline and then we use the constructed
// pipeline to create the right parameters to pass to the factory
final String stateId = "my-state-id";
// For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
TupleTag<Integer> mainOutput = new TupleTag<>();
PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void process(ProcessContext c) {
}
}).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
// Then there will be a digging down to the step context to get the state internals
when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
// A single bundle with some elements in the global window; it should register cleanup for the
// global window state merely by having the evaluator created. The cleanup logic does not
// depend on the window.
CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
// Merely creating the evaluator should suffice to register the cleanup callback
factory.forApplication(producingTransform, inputBundle);
ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the first window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the second window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.
the class StatefulDoFnRunnerTest method testLateDropping.
@Test
public void testLateDropping() throws Exception {
MetricsContainerImpl container = new MetricsContainerImpl("any");
MetricsEnvironment.setCurrentContainer(container);
timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
runner.startBundle();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
Instant timestamp = new Instant(0);
runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
assertEquals(1L, droppedValues);
runner.finishBundle();
}
use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.
the class TimerInternalsTest method testCompareTo.
@Test
public void testCompareTo() {
Instant firstTimestamp = new Instant(100);
Instant secondTimestamp = new Instant(200);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), firstTimestamp);
IntervalWindow secondWindow = new IntervalWindow(firstTimestamp, secondTimestamp);
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
StateNamespace firstWindowNs = StateNamespaces.window(windowCoder, firstWindow);
StateNamespace secondWindowNs = StateNamespaces.window(windowCoder, secondWindow);
TimerData firstEventTime = TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.EVENT_TIME);
TimerData secondEventTime = TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.EVENT_TIME);
TimerData thirdEventTime = TimerData.of(secondWindowNs, secondTimestamp, TimeDomain.EVENT_TIME);
TimerData firstProcTime = TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.PROCESSING_TIME);
TimerData secondProcTime = TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME);
TimerData thirdProcTime = TimerData.of(secondWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME);
assertThat(firstEventTime, comparesEqualTo(TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.EVENT_TIME)));
assertThat(firstEventTime, lessThan(secondEventTime));
assertThat(secondEventTime, lessThan(thirdEventTime));
assertThat(firstEventTime, lessThan(thirdEventTime));
assertThat(secondProcTime, comparesEqualTo(TimerData.of(firstWindowNs, secondTimestamp, TimeDomain.PROCESSING_TIME)));
assertThat(firstProcTime, lessThan(secondProcTime));
assertThat(secondProcTime, lessThan(thirdProcTime));
assertThat(firstProcTime, lessThan(thirdProcTime));
assertThat(firstEventTime, not(comparesEqualTo(firstProcTime)));
assertThat(firstProcTime, not(comparesEqualTo(TimerData.of(firstWindowNs, firstTimestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME))));
}
use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.
the class ParDoTest method testValueStateFixedWindows.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateFixedWindows() {
final String stateId = "foo";
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
c.output(currentValue);
state.write(currentValue + 1);
}
};
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
PCollection<Integer> output = pipeline.apply(Create.timestamped(// first window
TimestampedValue.of(KV.of("hello", 7), new Instant(1)), TimestampedValue.of(KV.of("hello", 14), new Instant(2)), TimestampedValue.of(KV.of("hello", 21), new Instant(3)), // second window
TimestampedValue.of(KV.of("hello", 28), new Instant(11)), TimestampedValue.of(KV.of("hello", 35), new Instant(13)))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply("Stateful ParDo", ParDo.of(fn));
PAssert.that(output).inWindow(firstWindow).containsInAnyOrder(0, 1, 2);
PAssert.that(output).inWindow(secondWindow).containsInAnyOrder(0, 1);
pipeline.run();
}
use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder = WindowedValue.getFullCoder(VarIntCoder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
Collections.<PCollectionView<?>>emptyList(), /* side inputs */
PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarIntCoder.of());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<Integer>, Integer>() {
@Override
public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
return integerWindowedValue.getValue();
}
}, new CoderTypeInformation<>(VarIntCoder.of()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
Aggregations