Search in sources :

Example 21 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperatorTest method testLateDroppingForStatefulFn.

@Test
public void testLateDroppingForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10)));
    DoFn<Integer, String> fn = new DoFn<Integer, String>() {

        @StateId("state")
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context) {
            context.output(context.element().toString());
        }
    };
    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder = WindowedValue.getFullCoder(VarIntCoder.of(), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarIntCoder.of());
    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<Integer>, Integer>() {

        @Override
        public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
            return integerWindowedValue.getValue();
        }
    }, new CoderTypeInformation<>(VarIntCoder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    // this should not be late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(9);
    // this should still not be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(10);
    // this should now be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) Test(org.junit.Test)

Example 22 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperatorTest method testStateGCForStatefulFn.

@Test
public void testStateGCForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
    final String timerId = "boo";
    final String stateId = "dazzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder = WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
    DoFnOperator<KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), StringUtf8Coder.of());
    KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<KV<String, Integer>>, String>() {

        @Override
        public String getKey(WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
            return kvWindowedValue.getValue().getKey();
        }
    }, new CoderTypeInformation<>(StringUtf8Coder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    assertEquals(0, testHarness.numKeyedStateEntries());
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertEquals(2, testHarness.numKeyedStateEntries());
    testHarness.getOutput().clear();
    // this should trigger both the window.maxTimestamp() timer and the GC timer
    // this tests that the GC timer fires after the user timer
    testHarness.processWatermark(window1.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).getMillis());
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
    // ensure the state was garbage collected
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) Test(org.junit.Test)

Example 23 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperator method processElement2.

@Override
public final void processElement2(StreamRecord<RawUnionValue> streamRecord) throws Exception {
    pushbackDoFnRunner.startBundle();
    @SuppressWarnings("unchecked") WindowedValue<Iterable<?>> value = (WindowedValue<Iterable<?>>) streamRecord.getValue().getValue();
    PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag());
    sideInputHandler.addSideInputValue(sideInput, value);
    BagState<WindowedValue<InputT>> pushedBack = pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
    List<WindowedValue<InputT>> newPushedBack = new ArrayList<>();
    Iterable<WindowedValue<InputT>> pushedBackContents = pushedBack.read();
    if (pushedBackContents != null) {
        for (WindowedValue<InputT> elem : pushedBackContents) {
            // we need to set the correct key in case the operator is
            // a (keyed) window operator
            setKeyContextElement1(new StreamRecord<>(elem));
            Iterable<WindowedValue<InputT>> justPushedBack = pushbackDoFnRunner.processElementInReadyWindows(elem);
            Iterables.addAll(newPushedBack, justPushedBack);
        }
    }
    pushedBack.clear();
    long min = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
    for (WindowedValue<InputT> pushedBackValue : newPushedBack) {
        min = Math.min(min, pushedBackValue.getTimestamp().getMillis());
        pushedBack.add(pushedBackValue);
    }
    setPushedBackWatermark(min);
    pushbackDoFnRunner.finishBundle();
    // maybe output a new watermark
    processWatermark1(new Watermark(currentInputWatermark));
}
Also used : ArrayList(java.util.ArrayList) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 24 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class FlinkStatefulDoFnFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, V>>> values, Collector<WindowedValue<OutputT>> out) throws Exception {
    RuntimeContext runtimeContext = getRuntimeContext();
    DoFnRunners.OutputManager outputManager;
    if (outputMap.size() == 1) {
        outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
    } else {
        // it has some additional Outputs
        outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
    }
    final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();
    // get the first value, we need this for initializing the state internals with the key.
    // we are guaranteed to have a first value, otherwise reduce() would not have been called.
    WindowedValue<KV<K, V>> currentValue = iterator.next();
    final K key = currentValue.getValue().getKey();
    final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);
    // Used with Batch, we know that all the data is available for this key. We can't use the
    // timer manager from the context because it doesn't exist. So we create one and advance
    // time to the end after processing all elements.
    final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
    timerInternals.advanceProcessingTime(Instant.now());
    timerInternals.advanceSynchronizedProcessingTime(Instant.now());
    List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
    DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), dofn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return stateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return timerInternals;
        }
    }, windowingStrategy);
    if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
    }
    doFnRunner.startBundle();
    doFnRunner.processElement(currentValue);
    while (iterator.hasNext()) {
        currentValue = iterator.next();
        doFnRunner.processElement(currentValue);
    }
    // Finish any pending windows by advancing the input watermark to infinity.
    timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
    // Finally, advance the processing time to infinity to fire any timers.
    timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
    fireEligibleTimers(timerInternals, doFnRunner);
    doFnRunner.finishBundle();
}
Also used : DoFnRunners(org.apache.beam.runners.core.DoFnRunners) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) Collector(org.apache.flink.util.Collector) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) KV(org.apache.beam.sdk.values.KV) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext)

Example 25 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class TriggerStateMachineTester method injectElements.

public final void injectElements(Collection<TimestampedValue<InputT>> values) throws Exception {
    for (TimestampedValue<InputT> value : values) {
        WindowTracing.trace("TriggerTester.injectElements: {}", value);
    }
    List<WindowedValue<InputT>> windowedValues = Lists.newArrayListWithCapacity(values.size());
    for (TimestampedValue<InputT> input : values) {
        try {
            InputT value = input.getValue();
            Instant timestamp = input.getTimestamp();
            Collection<W> assignedWindows = windowFn.assignWindows(new TestAssignContext<W>(windowFn, value, timestamp, GlobalWindow.INSTANCE));
            for (W window : assignedWindows) {
                activeWindows.addActiveForTesting(window);
                // Today, triggers assume onTimer firing at the watermark time, whether or not they
                // explicitly set the timer themselves. So this tester must set it.
                timerInternals.setTimer(TimerData.of(windowNamespace(window), window.maxTimestamp(), TimeDomain.EVENT_TIME));
            }
            windowedValues.add(WindowedValue.of(value, timestamp, assignedWindows, PaneInfo.NO_FIRING));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    for (WindowedValue<InputT> windowedValue : windowedValues) {
        for (BoundedWindow untypedWindow : windowedValue.getWindows()) {
            // SDK is responsible for type safety
            @SuppressWarnings("unchecked") W window = mergeResult((W) untypedWindow);
            TriggerStateMachine.OnElementContext context = contextFactory.createOnElementContext(window, new TestTimers(windowNamespace(window)), windowedValue.getTimestamp(), executableTrigger, getFinishedSet(window));
            if (!context.trigger().isFinished()) {
                executableTrigger.invokeOnElement(context);
            }
        }
    }
}
Also used : Instant(org.joda.time.Instant) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)89 Test (org.junit.Test)53 Instant (org.joda.time.Instant)47 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)36 KV (org.apache.beam.sdk.values.KV)19 ArrayList (java.util.ArrayList)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)17 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)16 TupleTag (org.apache.beam.sdk.values.TupleTag)13 JavaRDD (org.apache.spark.api.java.JavaRDD)8 ByteString (com.google.protobuf.ByteString)7 BeamFnApi (org.apache.beam.fn.v1.BeamFnApi)7 ThrowingConsumer (org.apache.beam.fn.harness.fn.ThrowingConsumer)6 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)6 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)6 CloseableThrowingConsumer (org.apache.beam.fn.harness.fn.CloseableThrowingConsumer)5 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)5 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)5