Search in sources :

Example 1 with BundleProgressHandler

use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.

the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
    TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
    ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
    }).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
    }).build();
    ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
    ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean onceEmitted;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (onceEmitted) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create(mainOutput.getId()).accept(three);
                    receiverFactory.create(additionalOutput1.getId()).accept(four);
                    receiverFactory.create(additionalOutput2.getId()).accept(five);
                    onceEmitted = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return processBundleDescriptor;
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() {
        }
    };
    // Wire the stage bundle factory into our context.
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
    long watermark = testHarness.getCurrentWatermark() + 1;
    testHarness.open();
    testHarness.processElement(new StreamRecord<>(zero));
    testHarness.processWatermark(watermark);
    watermark++;
    testHarness.processWatermark(watermark);
    assertEquals(watermark, testHarness.getCurrentWatermark());
    // watermark hold until bundle complete
    assertEquals(0, testHarness.getOutput().size());
    // triggers finish bundle
    testHarness.close();
    assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
    assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
    assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) OutputTag(org.apache.flink.util.OutputTag) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) KV(org.apache.beam.sdk.values.KV) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 2 with BundleProgressHandler

use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.

the class ExecutableStageDoFnOperator method open.

@Override
public void open() throws Exception {
    executableStage = ExecutableStage.fromPayload(payload);
    hasSdfProcessFn = hasSDF(executableStage);
    initializeUserState(executableStage, getKeyedStateBackend(), pipelineOptions);
    // TODO: Wire this into the distributed cache and make it pluggable.
    // TODO: Do we really want this layer of indirection when accessing the stage bundle factory?
    // It's a little strange because this operator is responsible for the lifetime of the stage
    // bundle "factory" (manager?) but not the job or Flink bundle factories. How do we make
    // ownership of the higher level "factories" explicit? Do we care?
    stageContext = contextFactory.get(jobInfo);
    stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
    stateRequestHandler = getStateRequestHandler(executableStage);
    progressHandler = new BundleProgressHandler() {

        @Override
        public void onProgress(ProcessBundleProgressResponse progress) {
            if (flinkMetricContainer != null) {
                flinkMetricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
            }
        }

        @Override
        public void onCompleted(ProcessBundleResponse response) {
            if (flinkMetricContainer != null) {
                flinkMetricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
            }
        }
    };
    finalizationHandler = BundleFinalizationHandlers.inMemoryFinalizer(stageBundleFactory.getInstructionRequestHandler());
    checkpointHandler = getBundleCheckpointHandler(hasSdfProcessFn);
    minEventTimeTimerTimestampInCurrentBundle = Long.MAX_VALUE;
    minEventTimeTimerTimestampInLastBundle = Long.MAX_VALUE;
    super.setPreBundleCallback(this::preBundleStartCallback);
    super.setBundleFinishedCallback(this::finishBundleCallback);
    // This will call {@code createWrappingDoFnRunner} which needs the above dependencies.
    super.open();
}
Also used : BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)

Example 3 with BundleProgressHandler

use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.

the class FlinkExecutableStageFunctionTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean once;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (once) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create("one").accept(three);
                    receiverFactory.create("two").accept(four);
                    receiverFactory.create("three").accept(five);
                    once = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return processBundleDescriptor;
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() throws Exception {
        }
    };
    // Wire the stage bundle factory into our context.
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    FlinkExecutableStageFunction<Integer> function = getFunction(outputTagMap);
    function.open(new Configuration());
    if (isStateful) {
        function.reduce(Collections.emptyList(), collector);
    } else {
        function.mapPartition(Collections.emptyList(), collector);
    }
    // Ensure that the tagged values sent to the collector have the correct union tags as specified
    // in the output map.
    verify(collector).collect(new RawUnionValue(1, three));
    verify(collector).collect(new RawUnionValue(2, four));
    verify(collector).collect(new RawUnionValue(3, five));
    verifyNoMoreInteractions(collector);
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Configuration(org.apache.flink.configuration.Configuration) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) KV(org.apache.beam.sdk.values.KV) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test)

Example 4 with BundleProgressHandler

use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.

the class FlinkExecutableStageFunction method open.

@Override
public void open(Configuration parameters) {
    FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
    // Register standard file systems.
    FileSystems.setDefaultPipelineOptions(options);
    executableStage = ExecutableStage.fromPayload(stagePayload);
    runtimeContext = getRuntimeContext();
    metricContainer = new FlinkMetricContainer(runtimeContext);
    // TODO: Wire this into the distributed cache and make it pluggable.
    stageContext = contextFactory.get(jobInfo);
    stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
    // NOTE: It's safe to reuse the state handler between partitions because each partition uses the
    // same backing runtime context and broadcast variables. We use checkState below to catch errors
    // in backward-incompatible Flink changes.
    stateRequestHandler = getStateRequestHandler(executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext);
    progressHandler = new BundleProgressHandler() {

        @Override
        public void onProgress(ProcessBundleProgressResponse progress) {
            metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
        }

        @Override
        public void onCompleted(ProcessBundleResponse response) {
            metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
        }
    };
    // TODO(BEAM-11021): Support bundle finalization in portable batch.
    finalizationHandler = bundleId -> {
        throw new UnsupportedOperationException("Portable Flink runner doesn't support bundle finalization in batch mode. For more details, please refer to https://issues.apache.org/jira/browse/BEAM-11021.");
    };
    bundleCheckpointHandler = getBundleCheckpointHandler(executableStage);
}
Also used : FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer)

Example 5 with BundleProgressHandler

use of org.apache.beam.runners.fnexecution.control.BundleProgressHandler in project beam by apache.

the class SparkExecutableStageFunction method getBundleProgressHandler.

private BundleProgressHandler getBundleProgressHandler() {
    String stageName = stagePayload.getInput();
    MetricsContainerImpl container = metricsAccumulator.value().getContainer(stageName);
    return new BundleProgressHandler() {

        @Override
        public void onProgress(ProcessBundleProgressResponse progress) {
            container.update(progress.getMonitoringInfosList());
        }

        @Override
        public void onCompleted(ProcessBundleResponse response) {
            container.update(response.getMonitoringInfosList());
        }
    };
}
Also used : MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)

Aggregations

BundleProgressHandler (org.apache.beam.runners.fnexecution.control.BundleProgressHandler)6 ProcessBundleProgressResponse (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse)3 ProcessBundleResponse (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)3 BundleCheckpointHandler (org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler)3 BundleFinalizationHandler (org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler)3 OutputReceiverFactory (org.apache.beam.runners.fnexecution.control.OutputReceiverFactory)3 ProcessBundleDescriptors (org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors)3 RemoteBundle (org.apache.beam.runners.fnexecution.control.RemoteBundle)3 StageBundleFactory (org.apache.beam.runners.fnexecution.control.StageBundleFactory)3 TimerReceiverFactory (org.apache.beam.runners.fnexecution.control.TimerReceiverFactory)3 StateRequestHandler (org.apache.beam.runners.fnexecution.state.StateRequestHandler)3 FnDataReceiver (org.apache.beam.sdk.fn.data.FnDataReceiver)3 KV (org.apache.beam.sdk.values.KV)3 Test (org.junit.Test)3 RawUnionValue (org.apache.beam.sdk.transforms.join.RawUnionValue)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 ArrayList (java.util.ArrayList)1 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)1 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)1 FlinkPipelineOptions (org.apache.beam.runners.flink.FlinkPipelineOptions)1