Search in sources :

Example 1 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class BatchGroupAlsoByWindowReshuffleDoFnTest method makeRunner.

private static <K, InputT, OutputT, W extends BoundedWindow> DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> makeRunner(GroupAlsoByWindowDoFnFactory<K, InputT, OutputT> fnFactory, WindowingStrategy<?, W> windowingStrategy, TupleTag<KV<K, OutputT>> outputTag, DoFnRunners.OutputManager outputManager) {
    final StepContext stepContext = new TestStepContext(STEP_NAME);
    StateInternalsFactory<K> stateInternalsFactory = key -> stepContext.stateInternals();
    BatchGroupAlsoByWindowFn<K, InputT, OutputT> fn = fnFactory.forStrategy(windowingStrategy, stateInternalsFactory);
    return new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), fn, NullSideInputReader.empty(), outputManager, outputTag, stepContext);
}
Also used : Arrays(java.util.Arrays) KV(org.apache.beam.sdk.values.KV) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) StepContext(org.apache.beam.runners.core.StepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) GroupAlsoByWindowFnRunner(org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner) TupleTag(org.apache.beam.sdk.values.TupleTag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StateInternals(org.apache.beam.runners.core.StateInternals) ExpectedException(org.junit.rules.ExpectedException) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) GroupAlsoByWindowDoFnFactory(org.apache.beam.runners.dataflow.worker.util.GroupAlsoByWindowProperties.GroupAlsoByWindowDoFnFactory) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) List(java.util.List) Rule(org.junit.Rule) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.equalTo(org.hamcrest.Matchers.equalTo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StepContext(org.apache.beam.runners.core.StepContext) GroupAlsoByWindowFnRunner(org.apache.beam.runners.dataflow.worker.GroupAlsoByWindowFnRunner)

Example 2 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class SplittableParDoProcessKeyedElementsOp method open.

@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<byte[]>> timerRegistry, OpEmitter<RawUnionValue> emitter) {
    this.pipelineOptions = Base64Serializer.deserializeUnchecked(config.get("beamPipelineOptions"), SerializablePipelineOptions.class).get().as(SamzaPipelineOptions.class);
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
    final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
    this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), ByteArrayCoder.of(), pipelineOptions.getStoreBatchGetSize());
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(ByteArrayCoder.of(), timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
    final KeyedInternals<byte[]> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
    SplittableParDoViaKeyedWorkItems.ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = processElements.newProcessFn(processElements.getFn());
    DoFnInvokers.tryInvokeSetupFor(processFn, pipelineOptions);
    processFn.setStateInternalsFactory(stateInternalsFactory);
    processFn.setTimerInternalsFactory(timerInternalsFactory);
    processFn.setSideInputReader(NullSideInputReader.empty());
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(processElements.getFn(), pipelineOptions, new OutputWindowedValue<OutputT>() {

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputWindowedValue(mainOutputTag, output, timestamp, windows, pane);
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    }, NullSideInputReader.empty(), Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10), () -> {
        throw new UnsupportedOperationException("BundleFinalizer unsupported in Samza");
    }));
    final StepContext stepContext = new StepContext() {

        @Override
        public StateInternals stateInternals() {
            return keyedInternals.stateInternals();
        }

        @Override
        public TimerInternals timerInternals() {
            return keyedInternals.timerInternals();
        }
    };
    this.fnRunner = DoFnRunners.simpleRunner(pipelineOptions, processFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
}
Also used : StepContext(org.apache.beam.runners.core.StepContext) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) SplittableParDoViaKeyedWorkItems(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) Collection(java.util.Collection) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions)

Example 3 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class DoFnOperatorTest method testWatermarkUpdateAfterWatermarkHoldRelease.

@Test
public void testWatermarkUpdateAfterWatermarkHoldRelease() throws Exception {
    Coder<WindowedValue<KV<String, String>>> coder = WindowedValue.getValueOnlyCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
    TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
    List<Long> emittedWatermarkHolds = new ArrayList<>();
    KeySelector<WindowedValue<KV<String, String>>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue().getKey(), StringUtf8Coder.of());
    DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = new DoFnOperator<KV<String, String>, KV<String, String>>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
    Collections.emptyList(), /* side inputs */
    FlinkPipelineOptions.defaults(), StringUtf8Coder.of(), keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()) {

        @Override
        protected DoFnRunner<KV<String, String>, KV<String, String>> createWrappingDoFnRunner(DoFnRunner<KV<String, String>, KV<String, String>> wrappedRunner, StepContext stepContext) {
            StateNamespace namespace = StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
            StateTag<WatermarkHoldState> holdTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.LATEST);
            WatermarkHoldState holdState = stepContext.stateInternals().state(namespace, holdTag);
            TimerInternals timerInternals = stepContext.timerInternals();
            return new DoFnRunner<KV<String, String>, KV<String, String>>() {

                @Override
                public void startBundle() {
                    wrappedRunner.startBundle();
                }

                @Override
                public void processElement(WindowedValue<KV<String, String>> elem) {
                    wrappedRunner.processElement(elem);
                    holdState.add(elem.getTimestamp());
                    timerInternals.setTimer(namespace, "timer", "family", elem.getTimestamp().plus(Duration.millis(1)), elem.getTimestamp().plus(Duration.millis(1)), TimeDomain.EVENT_TIME);
                    timerInternals.setTimer(namespace, "cleanup", "", GlobalWindow.INSTANCE.maxTimestamp(), GlobalWindow.INSTANCE.maxTimestamp(), TimeDomain.EVENT_TIME);
                }

                @Override
                public <KeyT> void onTimer(String timerId, String timerFamilyId, KeyT key, BoundedWindow window, Instant timestamp, Instant outputTimestamp, TimeDomain timeDomain) {
                    if ("cleanup".equals(timerId)) {
                        holdState.clear();
                    } else {
                        holdState.add(outputTimestamp);
                    }
                }

                @Override
                public void finishBundle() {
                    wrappedRunner.finishBundle();
                }

                @Override
                public <KeyT> void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {
                    wrappedRunner.onWindowExpiration(window, timestamp, key);
                }

                @Override
                public DoFn<KV<String, String>, KV<String, String>> getFn() {
                    return doFn;
                }
            };
        }

        @Override
        void emitWatermarkIfHoldChanged(long currentWatermarkHold) {
            emittedWatermarkHolds.add(keyedStateInternals.minWatermarkHoldMs());
        }
    };
    OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    testHarness.setup();
    Instant now = Instant.now();
    testHarness.open();
    // process first element, set hold to `now', setup timer for `now + 1'
    testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now)));
    assertThat(emittedWatermarkHolds, is(equalTo(Collections.singletonList(now.getMillis()))));
    // fire timer, change hold to `now + 2'
    testHarness.processWatermark(now.getMillis() + 2);
    assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1))));
    // process second element, verify we emitted changed hold
    testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now.plus(Duration.millis(2)))));
    assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1, now.getMillis() + 2))));
    testHarness.processWatermark(GlobalWindow.INSTANCE.maxTimestamp().plus(Duration.millis(1)).getMillis());
    testHarness.processWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    testHarness.close();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) StepContext(org.apache.beam.runners.core.StepContext) ValueState(org.apache.beam.sdk.state.ValueState) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) TypeFactory(com.fasterxml.jackson.databind.type.TypeFactory) Create(org.apache.beam.sdk.transforms.Create) TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) LRUMap(com.fasterxml.jackson.databind.util.LRUMap) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FluentIterable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.FluentIterable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) KeySelector(org.apache.flink.api.java.functions.KeySelector) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) OutputTag(org.apache.flink.util.OutputTag) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Collectors(java.util.stream.Collectors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Objects(java.util.Objects) List(java.util.List) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Timer(org.apache.beam.sdk.state.Timer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) StateTag(org.apache.beam.runners.core.StateTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) Whitebox(org.powermock.reflect.Whitebox) KV(org.apache.beam.sdk.values.KV) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) Supplier(java.util.function.Supplier) StateTags(org.apache.beam.runners.core.StateTags) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimerSpec(org.apache.beam.sdk.state.TimerSpec) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Before(org.junit.Before) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionViewTesting(org.apache.beam.sdk.testing.PCollectionViewTesting) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Function(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) StateSpecs(org.apache.beam.sdk.state.StateSpecs) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) StepContext(org.apache.beam.runners.core.StepContext) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Instant(org.joda.time.Instant) TimeDomain(org.apache.beam.sdk.state.TimeDomain) KV(org.apache.beam.sdk.values.KV) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) Test(org.junit.Test)

Example 4 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class DoFnOperator method open.

@Override
public void open() throws Exception {
    // WindowDoFnOperator need use state and timer to get DoFn.
    // So must wait StateInternals and TimerInternals ready.
    // This will be called after initializeState()
    this.doFn = getDoFn();
    FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
    doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
    StepContext stepContext = new FlinkStepContext();
    doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    if (requiresStableInput) {
        // put this in front of the root FnRunner before any additional wrappers
        doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
    }
    doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
    earlyBindStateIfNeeded();
    if (!options.getDisableMetrics()) {
        flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
        String checkpointMetricNamespace = options.getReportCheckpointDuration();
        if (checkpointMetricNamespace != null) {
            MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
            checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
        }
    }
    elementCount = 0L;
    lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
    // Schedule timer to check timeout of finish bundle.
    long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
    checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
    if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
        pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
    } else {
        pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    }
    bundleFinalizer = new InMemoryBundleFinalizer();
    pendingFinalizations = new LinkedHashMap<>();
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) InternalTimeServiceManager(org.apache.flink.streaming.api.operators.InternalTimeServiceManager) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) Joiner(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) TimerInternals(org.apache.beam.runners.core.TimerInternals) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Map(java.util.Map) InternalTimerService(org.apache.flink.streaming.api.operators.InternalTimerService) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) FlinkBroadcastStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) InternalTimer(org.apache.flink.streaming.api.operators.InternalTimer) OutputTag(org.apache.flink.util.OutputTag) Serializable(java.io.Serializable) Workarounds(org.apache.beam.runners.flink.translation.utils.Workarounds) Stream(java.util.stream.Stream) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) VoidNamespace(org.apache.flink.runtime.state.VoidNamespace) KV(org.apache.beam.sdk.values.KV) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) InternalPriorityQueue(org.apache.flink.runtime.state.InternalPriorityQueue) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) Output(org.apache.flink.streaming.api.operators.Output) StateInternals(org.apache.beam.runners.core.StateInternals) SideInputReader(org.apache.beam.runners.core.SideInputReader) DoFn(org.apache.beam.sdk.transforms.DoFn) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) IOException(java.io.IOException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) MapState(org.apache.flink.api.common.state.MapState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) FileSystems(org.apache.beam.sdk.io.FileSystems) TimeDomain(org.apache.beam.sdk.state.TimeDomain) SplittableParDoViaKeyedWorkItems(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems) StateSpec(org.apache.beam.sdk.state.StateSpec) ScheduledFuture(java.util.concurrent.ScheduledFuture) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) LoggerFactory(org.slf4j.LoggerFactory) StepContext(org.apache.beam.runners.core.StepContext) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ListState(org.apache.flink.api.common.state.ListState) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) CheckpointStats(org.apache.beam.runners.flink.translation.utils.CheckpointStats) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) KeySelector(org.apache.flink.api.java.functions.KeySelector) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) List(java.util.List) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) Optional(java.util.Optional) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StateAndTimerBundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandlers.StateAndTimerBundleCheckpointHandler) Coder(org.apache.beam.sdk.coders.Coder) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) ProcessFnRunner(org.apache.beam.runners.core.ProcessFnRunner) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFnRunnerWithMetricsUpdate(org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate) OutputStream(java.io.OutputStream) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Triggerable(org.apache.flink.streaming.api.operators.Triggerable) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner) InMemoryBundleFinalizer(org.apache.beam.runners.core.InMemoryBundleFinalizer) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) BufferingDoFnRunner(org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner) InputStream(java.io.InputStream) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) StepContext(org.apache.beam.runners.core.StepContext) CheckpointStats(org.apache.beam.runners.flink.translation.utils.CheckpointStats) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) MetricName(org.apache.beam.sdk.metrics.MetricName) InMemoryBundleFinalizer(org.apache.beam.runners.core.InMemoryBundleFinalizer) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner) BufferingDoFnRunner(org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer)

Example 5 with StepContext

use of org.apache.beam.runners.core.StepContext in project beam by apache.

the class PartialGroupByKeyParDoFns method create.

public static <K, InputT, AccumT> ParDoFn create(PipelineOptions options, KvCoder<K, ?> inputElementCoder, @Nullable CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, List<Receiver> receivers, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    AppliedCombineFn<K, InputT, AccumT, ?> combineFn;
    SideInputReader sideInputReader;
    StepContext stepContext;
    if (cloudUserFn == null) {
        combineFn = null;
        sideInputReader = NullSideInputReader.empty();
        stepContext = null;
    } else {
        Object deserializedFn = SerializableUtils.deserializeFromByteArray(getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN), "serialized combine fn");
        @SuppressWarnings("unchecked") AppliedCombineFn<K, InputT, AccumT, ?> combineFnUnchecked = ((AppliedCombineFn<K, InputT, AccumT, ?>) deserializedFn);
        combineFn = combineFnUnchecked;
        sideInputReader = executionContext.getSideInputReader(sideInputInfos, combineFn.getSideInputViews(), operationContext);
        stepContext = executionContext.getStepContext(operationContext);
    }
    return create(options, inputElementCoder, combineFn, sideInputReader, receivers.get(0), stepContext);
}
Also used : StepContext(org.apache.beam.runners.core.StepContext) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Aggregations

StepContext (org.apache.beam.runners.core.StepContext)9 TimerInternals (org.apache.beam.runners.core.TimerInternals)5 DoFnRunners (org.apache.beam.runners.core.DoFnRunners)4 StateInternals (org.apache.beam.runners.core.StateInternals)4 KV (org.apache.beam.sdk.values.KV)4 List (java.util.List)3 DoFnRunner (org.apache.beam.runners.core.DoFnRunner)3 NullSideInputReader (org.apache.beam.runners.core.NullSideInputReader)3 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)3 TupleTag (org.apache.beam.sdk.values.TupleTag)3 Instant (org.joda.time.Instant)3 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 InMemoryStateInternals (org.apache.beam.runners.core.InMemoryStateInternals)2