Search in sources :

Example 1 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class TransformExecutorTest method callWithNullInputBundleFinishesBundleAndCompletes.

@Test
public void callWithNullInputBundleFinishesBundleAndCompletes() throws Exception {
    final TransformResult<Object> result = StepTransformResult.withoutHold(createdProducer).build();
    final AtomicBoolean finishCalled = new AtomicBoolean(false);
    TransformEvaluator<Object> evaluator = new TransformEvaluator<Object>() {

        @Override
        public void processElement(WindowedValue<Object> element) throws Exception {
            throw new IllegalArgumentException("Shouldn't be called");
        }

        @Override
        public TransformResult<Object> finishBundle() throws Exception {
            finishCalled.set(true);
            return result;
        }
    };
    when(registry.forApplication(createdProducer, null)).thenReturn(evaluator);
    TransformExecutor<Object> executor = TransformExecutor.create(evaluationContext, registry, Collections.<ModelEnforcementFactory>emptyList(), null, createdProducer, completionCallback, transformEvaluationState);
    executor.run();
    assertThat(finishCalled.get(), is(true));
    assertThat(completionCallback.handledResult, Matchers.<TransformResult<?>>equalTo(result));
    assertThat(completionCallback.handledException, is(nullValue()));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Test(org.junit.Test)

Example 2 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class SparkGroupAlsoByWindowViaWindowSet method groupAlsoByWindow.

public static <K, InputT, W extends BoundedWindow> JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> groupAlsoByWindow(JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> inputDStream, final Coder<K> keyCoder, final Coder<WindowedValue<InputT>> wvCoder, final WindowingStrategy<?, W> windowingStrategy, final SparkRuntimeContext runtimeContext, final List<Integer> sourceIds) {
    final IterableCoder<WindowedValue<InputT>> itrWvCoder = IterableCoder.of(wvCoder);
    final Coder<InputT> iCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder();
    final Coder<? extends BoundedWindow> wCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getWindowCoder();
    final Coder<WindowedValue<KV<K, Iterable<InputT>>>> wvKvIterCoder = FullWindowedValueCoder.of(KvCoder.of(keyCoder, IterableCoder.of(iCoder)), wCoder);
    final TimerInternals.TimerDataCoder timerDataCoder = TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
    long checkpointDurationMillis = runtimeContext.getPipelineOptions().as(SparkPipelineOptions.class).getCheckpointDurationMillis();
    // we have to switch to Scala API to avoid Optional in the Java API, see: SPARK-4819.
    // we also have a broader API for Scala (access to the actual key and entire iterator).
    // we use coders to convert objects in the PCollection to byte arrays, so they
    // can be transferred over the network for the shuffle and be in serialized form
    // for checkpointing.
    // for readability, we add comments with actual type next to byte[].
    // to shorten line length, we use:
    //---- WV: WindowedValue
    //---- Iterable: Itr
    //---- AccumT: A
    //---- InputT: I
    DStream<Tuple2<ByteArray, byte[]>> /*Itr<WV<I>>*/
    pairDStream = inputDStream.transformToPair(new Function<JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>>, JavaPairRDD<ByteArray, byte[]>>() {

        // we use mapPartitions with the RDD API because its the only available API
        // that allows to preserve partitioning.
        @Override
        public JavaPairRDD<ByteArray, byte[]> call(JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> rdd) throws Exception {
            return rdd.mapPartitions(TranslationUtils.functionToFlatMapFunction(WindowingHelpers.<KV<K, Iterable<WindowedValue<InputT>>>>unwindowFunction()), true).mapPartitionsToPair(TranslationUtils.<K, Iterable<WindowedValue<InputT>>>toPairFlatMapFunction(), true).mapPartitionsToPair(TranslationUtils.pairFunctionToPairFlatMapFunction(CoderHelpers.toByteFunction(keyCoder, itrWvCoder)), true);
        }
    }).dstream();
    PairDStreamFunctions<ByteArray, byte[]> pairDStreamFunctions = DStream.toPairDStreamFunctions(pairDStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<byte[]>fakeClassTag(), null);
    int defaultNumPartitions = pairDStreamFunctions.defaultPartitioner$default$1();
    Partitioner partitioner = pairDStreamFunctions.defaultPartitioner(defaultNumPartitions);
    // use updateStateByKey to scan through the state and update elements and timers.
    DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
    firedStream = pairDStreamFunctions.updateStateByKey(new SerializableFunction1<scala.collection.Iterator<Tuple3</*K*/
    ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
    List<byte[]>>>>>, scala.collection.Iterator<Tuple2</*K*/
    ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
    List<byte[]>>>>>() {

        @Override
        public scala.collection.Iterator<Tuple2</*K*/
        ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
        List<byte[]>>>> apply(final scala.collection.Iterator<Tuple3</*K*/
        ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
        List<byte[]>>>>> iter) {
            //--- ACTUAL STATEFUL OPERATION:
            //
            // Input Iterator: the partition (~bundle) of a cogrouping of the input
            // and the previous state (if exists).
            //
            // Output Iterator: the output key, and the updated state.
            //
            // possible input scenarios for (K, Seq, Option<S>):
            // (1) Option<S>.isEmpty: new data with no previous state.
            // (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour).
            // (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state.
            final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn = SystemReduceFn.buffering(((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder());
            final OutputWindowedValueHolder<K, InputT> outputHolder = new OutputWindowedValueHolder<>();
            // use in memory Aggregators since Spark Accumulators are not resilient
            // in stateful operators, once done with this partition.
            final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider");
            final CounterCell droppedDueToClosedWindow = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER));
            final CounterCell droppedDueToLateness = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER));
            AbstractIterator<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
            outIter = new AbstractIterator<Tuple2</*K*/
            ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
            List<byte[]>>>>() {

                @Override
                protected Tuple2</*K*/
                ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
                List<byte[]>>> computeNext() {
                    // (possibly) previous-state and (possibly) new data.
                    while (iter.hasNext()) {
                        // for each element in the partition:
                        Tuple3<ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, List<byte[]>>>> next = iter.next();
                        ByteArray encodedKey = next._1();
                        K key = CoderHelpers.fromByteArray(encodedKey.getValue(), keyCoder);
                        Seq<byte[]> seq = next._2();
                        Option<Tuple2<StateAndTimers, List<byte[]>>> prevStateAndTimersOpt = next._3();
                        SparkStateInternals<K> stateInternals;
                        SparkTimerInternals timerInternals = SparkTimerInternals.forStreamFromSources(sourceIds, GlobalWatermarkHolder.get());
                        // get state(internals) per key.
                        if (prevStateAndTimersOpt.isEmpty()) {
                            // no previous state.
                            stateInternals = SparkStateInternals.forKey(key);
                        } else {
                            // with pre-existing state.
                            StateAndTimers prevStateAndTimers = prevStateAndTimersOpt.get()._1();
                            stateInternals = SparkStateInternals.forKeyAndState(key, prevStateAndTimers.getState());
                            Collection<byte[]> serTimers = prevStateAndTimers.getTimers();
                            timerInternals.addTimers(SparkTimerInternals.deserializeTimers(serTimers, timerDataCoder));
                        }
                        ReduceFnRunner<K, InputT, Iterable<InputT>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputHolder, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, runtimeContext.getPipelineOptions());
                        // clear before potential use.
                        outputHolder.clear();
                        if (!seq.isEmpty()) {
                            // new input for key.
                            try {
                                Iterable<WindowedValue<InputT>> elementsIterable = CoderHelpers.fromByteArray(seq.head(), itrWvCoder);
                                Iterable<WindowedValue<InputT>> validElements = LateDataUtils.dropExpiredWindows(key, elementsIterable, timerInternals, windowingStrategy, droppedDueToLateness);
                                reduceFnRunner.processElements(validElements);
                            } catch (Exception e) {
                                throw new RuntimeException("Failed to process element with ReduceFnRunner", e);
                            }
                        } else if (stateInternals.getState().isEmpty()) {
                            // no input and no state -> GC evict now.
                            continue;
                        }
                        try {
                            // advance the watermark to HWM to fire by timers.
                            timerInternals.advanceWatermark();
                            // call on timers that are ready.
                            reduceFnRunner.onTimers(timerInternals.getTimersReadyToProcess());
                        } catch (Exception e) {
                            throw new RuntimeException("Failed to process ReduceFnRunner onTimer.", e);
                        }
                        // this is mostly symbolic since actual persist is done by emitting output.
                        reduceFnRunner.persist();
                        // obtain output, if fired.
                        List<WindowedValue<KV<K, Iterable<InputT>>>> outputs = outputHolder.get();
                        if (!outputs.isEmpty() || !stateInternals.getState().isEmpty()) {
                            StateAndTimers updated = new StateAndTimers(stateInternals.getState(), SparkTimerInternals.serializeTimers(timerInternals.getTimers(), timerDataCoder));
                            // persist Spark's state by outputting.
                            List<byte[]> serOutput = CoderHelpers.toByteArrays(outputs, wvKvIterCoder);
                            return new Tuple2<>(encodedKey, new Tuple2<>(updated, serOutput));
                        }
                    // an empty state with no output, can be evicted completely - do nothing.
                    }
                    return endOfData();
                }
            };
            // log if there's something to log.
            long lateDropped = droppedDueToLateness.getCumulative();
            if (lateDropped > 0) {
                LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped));
                droppedDueToLateness.inc(-droppedDueToLateness.getCumulative());
            }
            long closedWindowDropped = droppedDueToClosedWindow.getCumulative();
            if (closedWindowDropped > 0) {
                LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped));
                droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative());
            }
            return scala.collection.JavaConversions.asScalaIterator(outIter);
        }
    }, partitioner, true, JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
    if (checkpointDurationMillis > 0) {
        firedStream.checkpoint(new Duration(checkpointDurationMillis));
    }
    // go back to Java now.
    JavaPairDStream<ByteArray, Tuple2<StateAndTimers, List<byte[]>>> /*WV<KV<K, Itr<I>>>*/
    javaFiredStream = JavaPairDStream.fromPairDStream(firedStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
    // filter state-only output (nothing to fire) and remove the state from the output.
    return javaFiredStream.filter(new Function<Tuple2</*K*/
    ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
    List<byte[]>>>, Boolean>() {

        @Override
        public Boolean call(Tuple2</*K*/
        ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
        List<byte[]>>> t2) throws Exception {
            // filter output if defined.
            return !t2._2()._2().isEmpty();
        }
    }).flatMap(new FlatMapFunction<Tuple2</*K*/
    ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
    List<byte[]>>>, WindowedValue<KV<K, Iterable<InputT>>>>() {

        @Override
        public Iterable<WindowedValue<KV<K, Iterable<InputT>>>> call(Tuple2</*K*/
        ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
        List<byte[]>>> t2) throws Exception {
            // return in serialized form.
            return CoderHelpers.fromByteArrays(t2._2()._2(), wvKvIterCoder);
        }
    });
}
Also used : MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) CounterCell(org.apache.beam.runners.core.metrics.CounterCell) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) ByteArray(org.apache.beam.runners.spark.util.ByteArray) List(java.util.List) ArrayList(java.util.ArrayList) ReduceFnRunner(org.apache.beam.runners.core.ReduceFnRunner) SystemReduceFn(org.apache.beam.runners.core.SystemReduceFn) Duration(org.apache.spark.streaming.Duration) TimerInternals(org.apache.beam.runners.core.TimerInternals) Collection(java.util.Collection) Option(scala.Option) Seq(scala.collection.Seq) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Function(org.apache.spark.api.java.function.Function) UnsupportedSideInputReader(org.apache.beam.runners.core.UnsupportedSideInputReader) AbstractIterator(com.google.common.collect.AbstractIterator) AbstractIterator(com.google.common.collect.AbstractIterator) Partitioner(org.apache.spark.Partitioner) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) KV(org.apache.beam.sdk.values.KV) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) JavaRDD(org.apache.spark.api.java.JavaRDD) Tuple2(scala.Tuple2) Tuple3(scala.Tuple3)

Example 3 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class BoundedSourceRunnerTest method testStart.

@Test
public void testStart() throws Exception {
    List<WindowedValue<Long>> outValues = new ArrayList<>();
    Map<String, Collection<ThrowingConsumer<WindowedValue<Long>>>> outputMap = ImmutableMap.of("out", ImmutableList.of(outValues::add));
    ByteString encodedSource = ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));
    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(PipelineOptionsFactory.create(), BeamFnApi.FunctionSpec.newBuilder().setData(Any.pack(BytesValue.newBuilder().setValue(encodedSource).build())).build(), outputMap);
    runner.start();
    assertThat(outValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
}
Also used : BoundedSource(org.apache.beam.sdk.io.BoundedSource) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ByteString(com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) Collection(java.util.Collection) ByteString(com.google.protobuf.ByteString) Test(org.junit.Test)

Example 4 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperatorTest method testLateDroppingForStatefulFn.

@Test
public void testLateDroppingForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10)));
    DoFn<Integer, String> fn = new DoFn<Integer, String>() {

        @StateId("state")
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context) {
            context.output(context.element().toString());
        }
    };
    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder = WindowedValue.getFullCoder(VarIntCoder.of(), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<String> outputTag = new TupleTag<>("main-output");
    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarIntCoder.of());
    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<Integer>, Integer>() {

        @Override
        public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
            return integerWindowedValue.getValue();
        }
    }, new CoderTypeInformation<>(VarIntCoder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    // this should not be late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(9);
    // this should still not be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
    testHarness.getOutput().clear();
    testHarness.processWatermark(10);
    // this should now be considered late
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) Test(org.junit.Test)

Example 5 with WindowedValue

use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.

the class DoFnOperatorTest method testStateGCForStatefulFn.

@Test
public void testStateGCForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
    final String timerId = "boo";
    final String stateId = "dazzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder = WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
    DoFnOperator<KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), StringUtf8Coder.of());
    KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<KV<String, Integer>>, String>() {

        @Override
        public String getKey(WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
            return kvWindowedValue.getValue().getKey();
        }
    }, new CoderTypeInformation<>(StringUtf8Coder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    assertEquals(0, testHarness.numKeyedStateEntries());
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertEquals(2, testHarness.numKeyedStateEntries());
    testHarness.getOutput().clear();
    // this should trigger both the window.maxTimestamp() timer and the GC timer
    // this tests that the GC timer fires after the user timer
    testHarness.processWatermark(window1.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).getMillis());
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
    // ensure the state was garbage collected
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) Test(org.junit.Test)

Aggregations

WindowedValue (org.apache.beam.sdk.util.WindowedValue)297 Test (org.junit.Test)165 Instant (org.joda.time.Instant)102 KV (org.apache.beam.sdk.values.KV)95 ArrayList (java.util.ArrayList)90 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)73 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)70 HashMap (java.util.HashMap)59 TupleTag (org.apache.beam.sdk.values.TupleTag)52 Coder (org.apache.beam.sdk.coders.Coder)49 KvCoder (org.apache.beam.sdk.coders.KvCoder)47 List (java.util.List)38 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)35 Map (java.util.Map)34 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)34 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)31 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)30 PCollection (org.apache.beam.sdk.values.PCollection)30 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)27 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)27