Search in sources :

Example 61 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupRegistered.

@Test
public void testCleanupRegistered() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.globalDefault().withWindowFn(FixedWindows.of(Duration.millis(10))));
    TimerInternals timerInternals = mock(TimerInternals.class);
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    when(stepContext.timerInternals()).thenReturn(timerInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
    parDoFn.processElement(WindowedValue.of("foo", new Instant(1), firstWindow, PaneInfo.NO_FIRING));
    verify(stepContext).setStateCleanupTimer(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindow, IntervalWindow.getCoder(), firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)));
}
Also used : Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 62 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method testUnprocessedElements.

/**
 * A test that explicitly delays a side input so that the main input will have to be reprocessed,
 * testing that {@code finishBundle()} re-assembles the GBK outputs correctly.
 */
@Test
public void testUnprocessedElements() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> mainInput = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.into(FixedWindows.of(Duration.millis(10))));
    final PCollectionView<List<Integer>> sideInput = pipeline.apply("Create side input", Create.of(42)).apply("Window side input", Window.into(FixedWindows.of(Duration.millis(10)))).apply("View side input", View.asList());
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = mainInput.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }, mainOutput, TupleTagList.empty(), Collections.singletonList(sideInput), DoFnSchemaInformation.create(), Collections.emptyMap())).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory<>(mockEvaluationContext, options);
    // This will be the stateful ParDo from the expansion
    AppliedPTransform<PCollection<KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(any())).thenReturn(mockStepContext);
    when(mockEvaluationContext.createBundle(Matchers.<PCollection<Integer>>any())).thenReturn(mockUncommittedBundle);
    when(mockStepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty());
    // And digging to check whether the window is ready
    when(mockEvaluationContext.createSideInputReader(anyList())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.isReady(Matchers.any(), Matchers.any())).thenReturn(false);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    String key = "hello";
    WindowedValue<KV<String, Integer>> firstKv = WindowedValue.of(KV.of(key, 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING);
    WindowedValue<KeyedWorkItem<String, KV<String, Integer>>> gbkOutputElement = firstKv.withValue(KeyedWorkItems.elementsWorkItem("hello", ImmutableList.of(firstKv, firstKv.withValue(KV.of(key, 13)), firstKv.withValue(KV.of(key, 15)))));
    CommittedBundle<KeyedWorkItem<String, KV<String, Integer>>> inputBundle = BUNDLE_FACTORY.createBundle((PCollection<KeyedWorkItem<String, KV<String, Integer>>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(producingTransform))).add(gbkOutputElement).commit(Instant.now());
    TransformEvaluator<KeyedWorkItem<String, KV<String, Integer>>> evaluator = factory.forApplication(producingTransform, inputBundle);
    evaluator.processElement(gbkOutputElement);
    // This should push back every element as a KV<String, Iterable<Integer>>
    // in the appropriate window. Since the keys are equal they are single-threaded
    TransformResult<KeyedWorkItem<String, KV<String, Integer>>> result = evaluator.finishBundle();
    List<Integer> pushedBackInts = new ArrayList<>();
    for (WindowedValue<? extends KeyedWorkItem<String, KV<String, Integer>>> unprocessedElement : result.getUnprocessedElements()) {
        assertThat(Iterables.getOnlyElement(unprocessedElement.getWindows()), equalTo((BoundedWindow) firstWindow));
        assertThat(unprocessedElement.getValue().key(), equalTo("hello"));
        for (WindowedValue<KV<String, Integer>> windowedKv : unprocessedElement.getValue().elementsIterable()) {
            pushedBackInts.add(windowedKv.getValue().getValue());
        }
    }
    assertThat(pushedBackInts, containsInAnyOrder(1, 13, 15));
}
Also used : ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) Matchers.anyList(org.mockito.Matchers.anyList) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 63 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class MultiStepCombineTest method testMultiStepCombineWindowed.

@Test
public void testMultiStepCombineWindowed() {
    SlidingWindows windowFn = SlidingWindows.of(Duration.millis(6L)).every(Duration.millis(3L));
    PCollection<KV<String, Long>> combined = pipeline.apply(Create.timestamped(TimestampedValue.of(KV.of("foo", 1L), new Instant(1L)), TimestampedValue.of(KV.of("bar", 2L), new Instant(2L)), TimestampedValue.of(KV.of("bizzle", 3L), new Instant(3L)), TimestampedValue.of(KV.of("bar", 4L), new Instant(4L)), TimestampedValue.of(KV.of("bizzle", 11L), new Instant(11L)))).apply(Window.into(windowFn)).apply(Combine.perKey(new MultiStepCombineFn()));
    PAssert.that("Windows should combine only elements in their windows", combined).inWindow(new IntervalWindow(new Instant(0L), Duration.millis(6L))).containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bizzle", 3L));
    PAssert.that("Elements should appear in all the windows they are assigned to", combined).inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(6L))).containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 2L));
    PAssert.that(combined).inWindow(new IntervalWindow(new Instant(6L), Duration.millis(6L))).containsInAnyOrder(KV.of("bizzle", 11L));
    PAssert.that(combined).containsInAnyOrder(KV.of("foo", 1L), KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bar", 2L), KV.of("bar", 4L), KV.of("bizzle", 11L), KV.of("bizzle", 11L), KV.of("bizzle", 3L), KV.of("bizzle", 3L));
    pipeline.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) SlidingWindows(org.apache.beam.sdk.transforms.windowing.SlidingWindows) Test(org.junit.Test)

Example 64 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class WatermarkCallbackExecutorTest method noCallbacksShouldFire.

@Test
public void noCallbacksShouldFire() throws Exception {
    CountDownLatch latch = new CountDownLatch(1);
    WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10));
    IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10)));
    executor.callOnGuaranteedFiring(create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));
    executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(5)));
    assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(false));
}
Also used : Instant(org.joda.time.Instant) CountDownLatch(java.util.concurrent.CountDownLatch) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 65 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class WatermarkCallbackExecutorTest method multipleCallbacksShouldFireFires.

@Test
public void multipleCallbacksShouldFireFires() throws Exception {
    CountDownLatch latch = new CountDownLatch(2);
    WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10));
    IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10)));
    executor.callOnGuaranteedFiring(create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));
    executor.callOnGuaranteedFiring(create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));
    executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(10)));
    assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(true));
}
Also used : Instant(org.joda.time.Instant) CountDownLatch(java.util.concurrent.CountDownLatch) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)238 Test (org.junit.Test)214 Instant (org.joda.time.Instant)213 WindowedValue (org.apache.beam.sdk.util.WindowedValue)67 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)56 KV (org.apache.beam.sdk.values.KV)56 Duration (org.joda.time.Duration)33 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)32 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)20 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)20 ArrayList (java.util.ArrayList)16 TupleTag (org.apache.beam.sdk.values.TupleTag)16 HashMap (java.util.HashMap)14 PCollectionView (org.apache.beam.sdk.values.PCollectionView)14 Category (org.junit.experimental.categories.Category)13 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)12 FixedWindows (org.apache.beam.sdk.transforms.windowing.FixedWindows)12 ByteBuffer (java.nio.ByteBuffer)11 Map (java.util.Map)11 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11