use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.
@Test
public void windowCleanupScheduled() throws Exception {
// To test the factory, first we set up a pipeline and then we use the constructed
// pipeline to create the right parameters to pass to the factory
final String stateId = "my-state-id";
// For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
TupleTag<Integer> mainOutput = new TupleTag<>();
PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void process(ProcessContext c) {
}
}).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
// Then there will be a digging down to the step context to get the state internals
when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
// A single bundle with some elements in the global window; it should register cleanup for the
// global window state merely by having the evaluator created. The cleanup logic does not
// depend on the window.
CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
// Merely creating the evaluator should suffice to register the cleanup callback
factory.forApplication(producingTransform, inputBundle);
ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the first window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the second window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testEmpty.
@Test
public void testEmpty() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
runner.startBundle();
runner.finishBundle();
List<?> result = outputManager.getOutput(outputTag);
assertEquals(0, result.size());
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowsReshuffleDoFnTest method testFixedWindows.
@Test
public void testFixedWindows() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager);
runner.startBundle();
WorkItem.Builder workItem = WorkItem.newBuilder();
workItem.setKey(ByteString.copyFromUtf8(KEY));
workItem.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<String> valueCoder = StringUtf8Coder.of();
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
runner.processElement(createValue(workItem, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
assertEquals(4, result.size());
WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
assertEquals(KEY, item0.getValue().getKey());
assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1"));
assertEquals(new Instant(1), item0.getTimestamp());
assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
assertEquals(KEY, item1.getValue().getKey());
assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
assertEquals(new Instant(2), item1.getTimestamp());
assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
assertEquals(KEY, item2.getValue().getKey());
assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v0"));
assertEquals(new Instant(0), item2.getTimestamp());
assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
WindowedValue<KV<String, Iterable<String>>> item3 = result.get(3);
assertEquals(KEY, item3.getValue().getKey());
assertThat(item3.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
assertEquals(new Instant(13), item3.getTimestamp());
assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindowsAndLateData.
@Test
public void testSlidingWindowsAndLateData() throws Exception {
MetricsContainerImpl container = new MetricsContainerImpl("step");
MetricsEnvironment.setCurrentContainer(container);
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST);
GroupAlsoByWindowFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, new StepContextStateInternalsFactory<String>(stepContext), StringUtf8Coder.of());
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, windowingStrategy, fn);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
runner.startBundle();
WorkItem.Builder workItem1 = WorkItem.newBuilder();
workItem1.setKey(ByteString.copyFromUtf8(KEY));
workItem1.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<String> valueCoder = StringUtf8Coder.of();
addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
runner.processElement(createValue(workItem1, valueCoder));
runner.finishBundle();
runner.startBundle();
WorkItem.Builder workItem2 = WorkItem.newBuilder();
workItem2.setKey(ByteString.copyFromUtf8(KEY));
workItem2.setWorkToken(WORK_TOKEN);
addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
runner.processElement(createValue(workItem2, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(3));
assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), emptyIterable()), equalTo(window(-10, 10).maxTimestamp()), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
long droppedValues = container.getCounter(MetricName.named(LateDataDroppingDoFnRunner.class, LateDataDroppingDoFnRunner.DROPPED_DUE_TO_LATENESS)).getCumulative().longValue();
assertThat(droppedValues, equalTo(2L));
}
use of org.apache.beam.runners.core.KeyedWorkItem in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method testFixedWindows.
@Test
public void testFixedWindows() throws Exception {
TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
ListOutputManager outputManager = new ListOutputManager();
DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
runner.startBundle();
WorkItem.Builder workItem1 = WorkItem.newBuilder();
workItem1.setKey(ByteString.copyFromUtf8(KEY));
workItem1.setWorkToken(WORK_TOKEN);
InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
Coder<String> valueCoder = StringUtf8Coder.of();
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
runner.processElement(createValue(workItem1, valueCoder));
runner.finishBundle();
runner.startBundle();
WorkItem.Builder workItem2 = WorkItem.newBuilder();
workItem2.setKey(ByteString.copyFromUtf8(KEY));
workItem2.setWorkToken(WORK_TOKEN);
addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
runner.processElement(createValue(workItem2, valueCoder));
runner.finishBundle();
List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
assertThat(result.size(), equalTo(2));
assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(window(0, 10).maxTimestamp()), equalTo(window(0, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(window(10, 20).maxTimestamp()), equalTo(window(10, 20)))));
}
Aggregations