Search in sources :

Example 1 with StateTag

use of org.apache.beam.runners.core.StateTag in project beam by apache.

the class DoFnOperatorTest method testWatermarkUpdateAfterWatermarkHoldRelease.

@Test
public void testWatermarkUpdateAfterWatermarkHoldRelease() throws Exception {
    Coder<WindowedValue<KV<String, String>>> coder = WindowedValue.getValueOnlyCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
    TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
    List<Long> emittedWatermarkHolds = new ArrayList<>();
    KeySelector<WindowedValue<KV<String, String>>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue().getKey(), StringUtf8Coder.of());
    DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = new DoFnOperator<KV<String, String>, KV<String, String>>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
    Collections.emptyList(), /* side inputs */
    FlinkPipelineOptions.defaults(), StringUtf8Coder.of(), keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()) {

        @Override
        protected DoFnRunner<KV<String, String>, KV<String, String>> createWrappingDoFnRunner(DoFnRunner<KV<String, String>, KV<String, String>> wrappedRunner, StepContext stepContext) {
            StateNamespace namespace = StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
            StateTag<WatermarkHoldState> holdTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.LATEST);
            WatermarkHoldState holdState = stepContext.stateInternals().state(namespace, holdTag);
            TimerInternals timerInternals = stepContext.timerInternals();
            return new DoFnRunner<KV<String, String>, KV<String, String>>() {

                @Override
                public void startBundle() {
                    wrappedRunner.startBundle();
                }

                @Override
                public void processElement(WindowedValue<KV<String, String>> elem) {
                    wrappedRunner.processElement(elem);
                    holdState.add(elem.getTimestamp());
                    timerInternals.setTimer(namespace, "timer", "family", elem.getTimestamp().plus(Duration.millis(1)), elem.getTimestamp().plus(Duration.millis(1)), TimeDomain.EVENT_TIME);
                    timerInternals.setTimer(namespace, "cleanup", "", GlobalWindow.INSTANCE.maxTimestamp(), GlobalWindow.INSTANCE.maxTimestamp(), TimeDomain.EVENT_TIME);
                }

                @Override
                public <KeyT> void onTimer(String timerId, String timerFamilyId, KeyT key, BoundedWindow window, Instant timestamp, Instant outputTimestamp, TimeDomain timeDomain) {
                    if ("cleanup".equals(timerId)) {
                        holdState.clear();
                    } else {
                        holdState.add(outputTimestamp);
                    }
                }

                @Override
                public void finishBundle() {
                    wrappedRunner.finishBundle();
                }

                @Override
                public <KeyT> void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {
                    wrappedRunner.onWindowExpiration(window, timestamp, key);
                }

                @Override
                public DoFn<KV<String, String>, KV<String, String>> getFn() {
                    return doFn;
                }
            };
        }

        @Override
        void emitWatermarkIfHoldChanged(long currentWatermarkHold) {
            emittedWatermarkHolds.add(keyedStateInternals.minWatermarkHoldMs());
        }
    };
    OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    testHarness.setup();
    Instant now = Instant.now();
    testHarness.open();
    // process first element, set hold to `now', setup timer for `now + 1'
    testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now)));
    assertThat(emittedWatermarkHolds, is(equalTo(Collections.singletonList(now.getMillis()))));
    // fire timer, change hold to `now + 2'
    testHarness.processWatermark(now.getMillis() + 2);
    assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1))));
    // process second element, verify we emitted changed hold
    testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now.plus(Duration.millis(2)))));
    assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1, now.getMillis() + 2))));
    testHarness.processWatermark(GlobalWindow.INSTANCE.maxTimestamp().plus(Duration.millis(1)).getMillis());
    testHarness.processWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    testHarness.close();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) StepContext(org.apache.beam.runners.core.StepContext) ValueState(org.apache.beam.sdk.state.ValueState) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) TypeFactory(com.fasterxml.jackson.databind.type.TypeFactory) Create(org.apache.beam.sdk.transforms.Create) TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) LRUMap(com.fasterxml.jackson.databind.util.LRUMap) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FluentIterable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.FluentIterable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) KeySelector(org.apache.flink.api.java.functions.KeySelector) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) OutputTag(org.apache.flink.util.OutputTag) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Collectors(java.util.stream.Collectors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Objects(java.util.Objects) List(java.util.List) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Timer(org.apache.beam.sdk.state.Timer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) StateTag(org.apache.beam.runners.core.StateTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) Whitebox(org.powermock.reflect.Whitebox) KV(org.apache.beam.sdk.values.KV) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) Supplier(java.util.function.Supplier) StateTags(org.apache.beam.runners.core.StateTags) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimerSpec(org.apache.beam.sdk.state.TimerSpec) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Before(org.junit.Before) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionViewTesting(org.apache.beam.sdk.testing.PCollectionViewTesting) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Function(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) StateSpecs(org.apache.beam.sdk.state.StateSpecs) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) StepContext(org.apache.beam.runners.core.StepContext) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Instant(org.joda.time.Instant) TimeDomain(org.apache.beam.sdk.state.TimeDomain) KV(org.apache.beam.sdk.values.KV) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) Test(org.junit.Test)

Aggregations

TypeFactory (com.fasterxml.jackson.databind.type.TypeFactory)1 LRUMap (com.fasterxml.jackson.databind.util.LRUMap)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Supplier (java.util.function.Supplier)1 Collectors (java.util.stream.Collectors)1 DoFnRunner (org.apache.beam.runners.core.DoFnRunner)1 StateNamespace (org.apache.beam.runners.core.StateNamespace)1 StateNamespaces (org.apache.beam.runners.core.StateNamespaces)1 StateTag (org.apache.beam.runners.core.StateTag)1 StateTags (org.apache.beam.runners.core.StateTags)1 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)1 StepContext (org.apache.beam.runners.core.StepContext)1 TimerInternals (org.apache.beam.runners.core.TimerInternals)1