Search in sources :

Example 1 with BundleCheckpointHandler

use of org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler in project beam by apache.

the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
    TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
    ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
    }).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
    }).build();
    ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
    ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean onceEmitted;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (onceEmitted) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create(mainOutput.getId()).accept(three);
                    receiverFactory.create(additionalOutput1.getId()).accept(four);
                    receiverFactory.create(additionalOutput2.getId()).accept(five);
                    onceEmitted = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return processBundleDescriptor;
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() {
        }
    };
    // Wire the stage bundle factory into our context.
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
    long watermark = testHarness.getCurrentWatermark() + 1;
    testHarness.open();
    testHarness.processElement(new StreamRecord<>(zero));
    testHarness.processWatermark(watermark);
    watermark++;
    testHarness.processWatermark(watermark);
    assertEquals(watermark, testHarness.getCurrentWatermark());
    // watermark hold until bundle complete
    assertEquals(0, testHarness.getOutput().size());
    // triggers finish bundle
    testHarness.close();
    assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
    assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
    assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) OutputTag(org.apache.flink.util.OutputTag) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) KV(org.apache.beam.sdk.values.KV) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 2 with BundleCheckpointHandler

use of org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler in project beam by apache.

the class FlinkExecutableStageFunction method getBundleCheckpointHandler.

private BundleCheckpointHandler getBundleCheckpointHandler(ExecutableStage executableStage) {
    if (!hasSDF(executableStage)) {
        sdfStateInternals = null;
        sdfStateInternals = null;
        return response -> {
            throw new UnsupportedOperationException("Self-checkpoint is only supported on splittable DoFn.");
        };
    }
    sdfTimerInternals = new InMemoryTimerInternals();
    sdfStateInternals = InMemoryStateInternals.forKey("sdf_state");
    return new BundleCheckpointHandlers.StateAndTimerBundleCheckpointHandler(key -> sdfTimerInternals, key -> sdfStateInternals, inputCoder, windowCoder);
}
Also used : RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) LoggerFactory(org.slf4j.LoggerFactory) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) GuardedBy(javax.annotation.concurrent.GuardedBy) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Preconditions(org.apache.flink.util.Preconditions) List(java.util.List) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) StateTags(org.apache.beam.runners.core.StateTags) BundleCheckpointHandlers(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandlers) ArrayList(java.util.ArrayList) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Collector(org.apache.flink.util.Collector) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateInternals(org.apache.beam.runners.core.StateInternals) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Logger(org.slf4j.Logger) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) AbstractRichFunction(org.apache.flink.api.common.functions.AbstractRichFunction) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals)

Example 3 with BundleCheckpointHandler

use of org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler in project beam by apache.

the class FlinkExecutableStageFunctionTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean once;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (once) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create("one").accept(three);
                    receiverFactory.create("two").accept(four);
                    receiverFactory.create("three").accept(five);
                    once = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return processBundleDescriptor;
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() throws Exception {
        }
    };
    // Wire the stage bundle factory into our context.
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    FlinkExecutableStageFunction<Integer> function = getFunction(outputTagMap);
    function.open(new Configuration());
    if (isStateful) {
        function.reduce(Collections.emptyList(), collector);
    } else {
        function.mapPartition(Collections.emptyList(), collector);
    }
    // Ensure that the tagged values sent to the collector have the correct union tags as specified
    // in the output map.
    verify(collector).collect(new RawUnionValue(1, three));
    verify(collector).collect(new RawUnionValue(2, four));
    verify(collector).collect(new RawUnionValue(3, five));
    verifyNoMoreInteractions(collector);
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Configuration(org.apache.flink.configuration.Configuration) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) KV(org.apache.beam.sdk.values.KV) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test)

Example 4 with BundleCheckpointHandler

use of org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler in project beam by apache.

the class SparkExecutableStageFunctionTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean once;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (once) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create("one").accept(three);
                    receiverFactory.create("two").accept(four);
                    receiverFactory.create("three").accept(five);
                    once = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return Mockito.mock(ProcessBundleDescriptors.ExecutableProcessBundleDescriptor.class);
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() {
        }
    };
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    SparkExecutableStageFunction<Integer, ?> function = getFunction(outputTagMap);
    List<WindowedValue<Integer>> inputs = new ArrayList<>();
    inputs.add(WindowedValue.valueInGlobalWindow(0));
    Iterator<RawUnionValue> iterator = function.call(inputs.iterator());
    Iterable<RawUnionValue> iterable = () -> iterator;
    assertThat(iterable, contains(new RawUnionValue(1, three), new RawUnionValue(2, four), new RawUnionValue(3, five)));
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test)

Aggregations

BundleCheckpointHandler (org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler)4 BundleFinalizationHandler (org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler)4 BundleProgressHandler (org.apache.beam.runners.fnexecution.control.BundleProgressHandler)4 OutputReceiverFactory (org.apache.beam.runners.fnexecution.control.OutputReceiverFactory)4 ProcessBundleDescriptors (org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors)4 RemoteBundle (org.apache.beam.runners.fnexecution.control.RemoteBundle)4 StageBundleFactory (org.apache.beam.runners.fnexecution.control.StageBundleFactory)4 TimerReceiverFactory (org.apache.beam.runners.fnexecution.control.TimerReceiverFactory)4 StateRequestHandler (org.apache.beam.runners.fnexecution.state.StateRequestHandler)4 FnDataReceiver (org.apache.beam.sdk.fn.data.FnDataReceiver)4 RawUnionValue (org.apache.beam.sdk.transforms.join.RawUnionValue)3 WindowedValue (org.apache.beam.sdk.util.WindowedValue)3 KV (org.apache.beam.sdk.values.KV)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)2 Coder (org.apache.beam.sdk.coders.Coder)2 Configuration (org.apache.flink.configuration.Configuration)2 IOException (java.io.IOException)1 EnumMap (java.util.EnumMap)1